diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -176,8 +176,8 @@ void initializeSIFixSGPRCopiesPass(PassRegistry &); extern char &SIFixSGPRCopiesID; -void initializeSIFixVGPRCopiesPass(PassRegistry &); -extern char &SIFixVGPRCopiesID; +void initializeSISimplifyPredicatedCopiesPass(PassRegistry &); +extern char &SISimplifyPredicatedCopiesID; void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -79,6 +79,7 @@ switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::G_SELECT: case AMDGPU::G_FDIV: case AMDGPU::G_FREM: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -38,7 +38,8 @@ return std::pair(Def->getOperand(1).getReg(), Offset); // FIXME: matcher should ignore copies - if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset)))) + if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset))) || + mi_match(Def->getOperand(2).getReg(), MRI, m_Pred_Copy(m_ICst(Offset)))) return std::pair(Def->getOperand(1).getReg(), Offset); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -120,7 +120,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); - I.setDesc(TII.get(TargetOpcode::COPY)); + I.setDesc(TII.get(TII.getCopyOpcode())); const MachineOperand &Src = I.getOperand(1); MachineOperand &Dst = I.getOperand(0); @@ -725,7 +725,7 @@ // (build_vector $src0, undef) -> copy $src0 MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { - MI.setDesc(TII.get(AMDGPU::COPY)); + MI.setDesc(TII.get(TII.getCopyOpcode())); MI.removeOperand(2); const auto &RC = IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; @@ -2241,7 +2241,7 @@ I.getOperand(1).setSubReg(SubRegIdx); } - I.setDesc(TII.get(TargetOpcode::COPY)); + I.setDesc(TII.get(TII.getCopyOpcode())); return true; } @@ -3083,8 +3083,8 @@ MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .add(MI.getOperand(2)); + BuildMI(*MBB, &MI, DL, TII.get(TII.getCopyOpcode()), AMDGPU::M0) + .add(MI.getOperand(2)); auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)); @@ -3172,8 +3172,8 @@ MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .add(MI.getOperand(2)); + BuildMI(*MBB, &MI, DL, TII.get(TII.getCopyOpcode()), AMDGPU::M0) + .add(MI.getOperand(2)); Register Addr = MI.getOperand(1).getReg(); Register VOffset; @@ -4307,7 +4307,9 @@ // FIXME: Copy check is a hack Register BasePtr; - if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) { + if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset)))) || + mi_match(Reg, *MRI, + m_GPtrAdd(m_Reg(BasePtr), m_Pred_Copy(m_ICst(Offset))))) { if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset)) return {}; const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -351,10 +351,10 @@ initializeSILowerI1CopiesPass(*PR); initializeSILowerSGPRSpillsPass(*PR); initializeSIFixSGPRCopiesPass(*PR); - initializeSIFixVGPRCopiesPass(*PR); initializeSIFoldOperandsPass(*PR); initializeSIPeepholeSDWAPass(*PR); initializeSIShrinkInstructionsPass(*PR); + initializeSISimplifyPredicatedCopiesPass(*PR); initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); @@ -1284,6 +1284,8 @@ bool GCNPassConfig::addPreRewrite() { if (EnableRegReassign) addPass(&GCNNSAReassignID); + + addPass(&SISimplifyPredicatedCopiesID); return true; } @@ -1334,6 +1336,7 @@ addPass(&SILowerSGPRSpillsID); addPass(createVGPRAllocPass(false)); + addPass(&SISimplifyPredicatedCopiesID); return true; } @@ -1361,7 +1364,6 @@ } void GCNPassConfig::addPostRegAlloc() { - addPass(&SIFixVGPRCopiesID); if (getOptLevel() > CodeGenOpt::None) addPass(&SIOptimizeExecMaskingID); TargetPassConfig::addPostRegAlloc(); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -134,7 +134,6 @@ R600TargetTransformInfo.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp - SIFixVGPRCopies.cpp SIFoldOperands.cpp SIFormMemoryClauses.cpp SIFrameLowering.cpp @@ -161,6 +160,7 @@ SIProgramInfo.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp + SISimplifyPredicatedCopies.cpp SIWholeQuadMode.cpp LINK_COMPONENTS diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -178,6 +178,7 @@ case AMDGPU::IMPLICIT_DEF: return nullptr; case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B64_PSEUDO: case AMDGPU::V_MOV_B64_e32: diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp --- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -96,7 +96,8 @@ return false; case AMDGPU::V_ACCVGPR_WRITE_B32_e64: break; - case AMDGPU::COPY: { + case AMDGPU::COPY: + case AMDGPU::PRED_COPY: { // Some subtargets cannot do an AGPR to AGPR copy directly, and need an // intermdiate temporary VGPR register. Try to find the defining // accvgpr_write to avoid temporary registers. diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -311,15 +311,16 @@ Register TmpReg = MRI.createVirtualRegister(NewSrcRC); - BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), - TmpReg) + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + TII->get(TII->getCopyOpcode()), TmpReg) .add(MI.getOperand(I)); if (IsAGPR) { const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); Register TmpAReg = MRI.createVirtualRegister(NewSrcRC); - unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ? - AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY; + unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass + ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 + : TII->getCopyOpcode(); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), TmpAReg) .addReg(TmpReg, RegState::Kill); @@ -618,6 +619,7 @@ default: continue; case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::WQM: case AMDGPU::STRICT_WQM: case AMDGPU::SOFT_WQM: @@ -732,7 +734,7 @@ // Haven't managed to resolve by replacing an SGPR with an immediate // Move src1 to be in M0 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - TII->get(AMDGPU::COPY), AMDGPU::M0) + TII->get(TII->getCopyOpcode()), AMDGPU::M0) .add(Src1); Src1.ChangeToRegister(AMDGPU::M0, false); } diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp deleted file mode 100644 --- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Add implicit use of exec to vector register copies. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-fix-vgpr-copies" - -namespace { - -class SIFixVGPRCopies : public MachineFunctionPass { -public: - static char ID; - -public: - SIFixVGPRCopies() : MachineFunctionPass(ID) { - initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "SI Fix VGPR copies"; } -}; - -} // End anonymous namespace. - -INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false) - -char SIFixVGPRCopies::ID = 0; - -char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; - -bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - switch (MI.getOpcode()) { - case AMDGPU::COPY: - if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) { - MI.addOperand(MF, - MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); - LLVM_DEBUG(dbgs() << "Add exec use to " << MI); - Changed = true; - } - break; - default: - break; - } - } - } - - return Changed; -} diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -766,7 +766,8 @@ CopyToVGPR = Src; } else { auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def); + BuildMI(MBB, UseMI, DL, TII->get(TII->getCopyOpcode()), Tmp) + .add(*Def); B.addReg(Tmp); } } @@ -777,7 +778,8 @@ Vgpr = VGPRCopies[CopyToVGPR]; } else { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); + BuildMI(MBB, UseMI, DL, TII->get(TII->getCopyOpcode()), Vgpr) + .add(*Def); VGPRCopies[CopyToVGPR] = Vgpr; } auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); @@ -844,7 +846,7 @@ // %sgpr1 = V_READFIRSTLANE_B32 %vgpr // => // %sgpr1 = COPY %sgpr0 - UseMI->setDesc(TII->get(AMDGPU::COPY)); + UseMI->setDesc(TII->get(TII->getCopyOpcode())); UseMI->getOperand(1).setReg(OpToFold.getReg()); UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); UseMI->getOperand(1).setIsKill(false); @@ -1076,7 +1078,7 @@ if (Src1Val == 0) { // y = or x, 0 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); } else if (Src1Val == -1) { // y = or x, -1 => y = v_mov_b32 -1 MI->removeOperand(Src1Idx); @@ -1096,7 +1098,7 @@ } else if (Src1Val == -1) { // y = and x, -1 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); } else return false; @@ -1108,7 +1110,7 @@ if (Src1Val == 0) { // y = xor x, 0 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); return true; } } @@ -1142,7 +1144,7 @@ LLVM_DEBUG(dbgs() << "Folded " << MI << " into "); auto &NewDesc = - TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false)); + TII->get(Src0->isReg() ? TII->getCopyOpcode() : getMovOpc(false)); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI.removeOperand(Src2Idx); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4287,8 +4287,8 @@ const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); Register SrcCondCopy = MRI.createVirtualRegister(CondRC); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) - .addReg(SrcCond); + BuildMI(*BB, MI, DL, TII->get(TII->getCopyOpcode()), SrcCondCopy) + .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addImm(0) .addReg(Src0, 0, AMDGPU::sub0) @@ -11850,6 +11850,7 @@ MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node), NewVTList, Ops); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); if (HasChain) { // Update chain. @@ -11859,9 +11860,9 @@ if (NewChannels == 1) { assert(Node->hasNUsesOfValue(1, 0)); - SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY, - SDLoc(Node), Users[Lane]->getValueType(0), - SDValue(NewNode, 0)); + SDNode *Copy = + DAG.getMachineNode(TII->getCopyOpcode(), SDLoc(Node), + Users[Lane]->getValueType(0), SDValue(NewNode, 0)); DAG.ReplaceAllUsesWith(Users[Lane], Copy); return nullptr; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -248,7 +248,7 @@ // Returns an opcode that can be used to move a value to a \p DstRC // register. If there is no hardware instruction that can store to \p - // DstRC, then AMDGPU::COPY is returned. + // DstRC, then getCopyOpcode() is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -62,8 +62,10 @@ cl::ReallyHidden); SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) - : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), - RI(ST), ST(ST) { + : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN, + /* CatchRetOpcode */ ~0u, /* ReturnOpcode */ ~0u, + AMDGPU::PRED_COPY), + RI(ST), ST(ST) { SchedModel.init(&ST); } @@ -1140,8 +1142,7 @@ if (Cond.size() == 1) { Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(Cond[0]); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -1183,8 +1184,7 @@ MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(RegOp); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -1197,8 +1197,7 @@ MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(RegOp); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(TrueReg) @@ -1281,7 +1280,7 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.isAGPRClass(DstRC)) - return AMDGPU::COPY; + return getCopyOpcode(); if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { @@ -1289,7 +1288,7 @@ } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } - return AMDGPU::COPY; + return getCopyOpcode(); } const MCInstrDesc & @@ -3004,6 +3003,7 @@ case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_ACCVGPR_WRITE_B32_e64: case AMDGPU::V_ACCVGPR_READ_B32_e64: case AMDGPU::V_ACCVGPR_MOV_B32: @@ -4805,6 +4805,8 @@ default: return AMDGPU::INSTRUCTION_LIST_END; case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; + case AMDGPU::PRED_COPY: + return AMDGPU::PRED_COPY; case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; @@ -4813,9 +4815,9 @@ case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - return MI.getOperand(1).isReg() || - RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? - AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; + return MI.getOperand(1).isReg() || RI.isAGPR(MRI, MI.getOperand(0).getReg()) + ? getCopyOpcode() + : AMDGPU::V_MOV_B32_e32; } case AMDGPU::S_ADD_I32: return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; @@ -4974,7 +4976,7 @@ unsigned Size = RI.getRegSizeInBits(*RC); unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; if (MO.isReg()) - Opcode = AMDGPU::COPY; + Opcode = getCopyOpcode(); else if (RI.isSGPRClass(RC)) Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; @@ -5556,7 +5558,7 @@ return; Register DstReg = MRI.createVirtualRegister(DstRC); - auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); + auto Copy = BuildMI(InsertMBB, I, DL, get(getCopyOpcode()), DstReg).add(Op); Op.setReg(DstReg); Op.setSubReg(0); @@ -7178,6 +7180,7 @@ switch (UseMI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::WQM: case AMDGPU::SOFT_WQM: case AMDGPU::STRICT_WWM: @@ -7347,6 +7350,7 @@ // class associated with the operand, so we need to find an equivalent VGPR // register class in order to move the instruction to the VALU. case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: @@ -8155,6 +8159,7 @@ DefInst = nullptr; switch (MI->getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_MOV_B32_e32: { auto &Op1 = MI->getOperand(1); if (Op1.isReg() && Op1.getReg().isVirtual()) { diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1213,7 +1213,7 @@ (void)Read2; - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); // Copy to the old destination registers. BuildMI(*MBB, InsertBefore, DL, CopyDesc) @@ -1345,7 +1345,7 @@ std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1394,7 +1394,7 @@ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst); @@ -1449,7 +1449,7 @@ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1508,7 +1508,7 @@ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1606,7 +1606,7 @@ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -93,6 +93,7 @@ Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B64_term: case AMDGPU::S_MOV_B32: @@ -110,6 +111,7 @@ Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B32: { const MachineOperand &Dst = MI.getOperand(0); @@ -211,12 +213,12 @@ switch (MI.getOpcode()) { case AMDGPU::S_MOV_B32_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + MI.setDesc(TII->get(RegSrc ? TII->getCopyOpcode() : AMDGPU::S_MOV_B32)); return true; } case AMDGPU::S_MOV_B64_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); + MI.setDesc(TII->get(RegSrc ? TII->getCopyOpcode() : AMDGPU::S_MOV_B64)); return true; } case AMDGPU::S_XOR_B64_term: { diff --git a/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp b/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp @@ -0,0 +1,112 @@ +//===-- SISimplifyPredicatedCopies.cpp - Simplify Copies after regalloc +//--------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Simplify the predicated COPY (PRED_COPY) instructions for various register +/// classes. AMDGPU vector register copies have a predicated dependency with +/// EXEC register and should be marked exec as an implicit operand post-RA. The +/// scalar registers don't carry any such dependency and hence the regular COPY +/// opcode can be used. AMDGPU by default uses PRED_COPY opcode right from the +/// instruction selection and this pass would simplify the COPY opcode and the +/// implicit operand field as mentioned above. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-simplify-predicated-copies" + +namespace { + +class SISimplifyPredicatedCopies : public MachineFunctionPass { +public: + static char ID; + + SISimplifyPredicatedCopies() : MachineFunctionPass(ID) { + initializeSISimplifyPredicatedCopiesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "SI Simplify Predicated Copies"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + const SIRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + SIMachineFunctionInfo *MFI; +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SISimplifyPredicatedCopies, DEBUG_TYPE, + "SI Simplify Predicated Copies", false, false) +INITIALIZE_PASS_END(SISimplifyPredicatedCopies, DEBUG_TYPE, + "SI Simplify Predicated Copies", false, false) + +char SISimplifyPredicatedCopies::ID = 0; + +char &llvm::SISimplifyPredicatedCopiesID = SISimplifyPredicatedCopies::ID; + +bool SISimplifyPredicatedCopies::runOnMachineFunction(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + MFI = MF.getInfo(); + TRI = ST.getRegisterInfo(); + MRI = &MF.getRegInfo(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AMDGPU::COPY: + case AMDGPU::PRED_COPY: + if (!TII->isVGPRCopy(MI) && + TRI->isSGPRReg(*MRI, MI.getOperand(1).getReg())) { + // For PRED_COPY with SGPR regclass, change the opcode back to the + // regular COPY. + if (Opcode == AMDGPU::PRED_COPY) { + LLVM_DEBUG(dbgs() << MI << " to use COPY opcode"); + MI.setDesc(TII->get(AMDGPU::COPY)); + Changed = true; + } + } else { + // For vector registers, add implicit exec use. + if (!MI.readsRegister(AMDGPU::EXEC, TRI)) { + MI.addOperand(MF, + MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + LLVM_DEBUG(dbgs() << "Add exec use to " << MI); + Changed = true; + } + } + break; + default: + break; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1517,7 +1517,7 @@ MI->removeOperand(Index); Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC); } - MI->setDesc(TII->get(AMDGPU::COPY)); + MI->setDesc(TII->get(TII->getCopyOpcode())); LLVM_DEBUG(dbgs() << " -> " << *MI); } } @@ -1535,7 +1535,7 @@ assert(MI->getNumExplicitOperands() == 2); } - MI->setDesc(TII->get(AMDGPU::COPY)); + MI->setDesc(TII->get(TII->getCopyOpcode())); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -8,8 +8,8 @@ bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_MERGE_VALUES %0, %1 @@ -24,8 +24,8 @@ bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_MERGE_VALUES %0, %1 @@ -40,8 +40,8 @@ bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s128) = G_MERGE_VALUES %0, %1 @@ -56,8 +56,8 @@ bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s128) = G_MERGE_VALUES %0, %1 @@ -217,8 +217,8 @@ bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 1 @@ -234,8 +234,8 @@ bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 1 @@ -292,8 +292,8 @@ bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 @@ -308,8 +308,8 @@ bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 @@ -369,8 +369,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -388,8 +388,8 @@ ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -56,10 +56,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll @@ -9,27 +9,27 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -40,29 +40,29 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -73,29 +73,29 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -106,33 +106,33 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll @@ -8,29 +8,29 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %ret @@ -41,31 +41,31 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %ret @@ -76,31 +76,31 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret float %ret @@ -111,35 +111,35 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll @@ -7,16 +7,16 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -27,17 +27,17 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -48,17 +48,17 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -69,19 +69,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -92,22 +92,22 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret @@ -118,23 +118,23 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret @@ -145,23 +145,23 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret @@ -172,25 +172,25 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -8,27 +8,27 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -39,29 +39,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,29 +72,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -105,33 +105,33 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll @@ -7,15 +7,15 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -26,16 +26,16 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret @@ -46,16 +46,16 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -66,18 +66,18 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -33,13 +33,13 @@ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[PRED_COPY]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -69,13 +69,13 @@ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[PRED_COPY]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll @@ -14,18 +14,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @declared_with_ret_deref() %load = load i64, ptr %call, align 8 @@ -38,18 +38,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @unknown_decl() %load = load i64, ptr %call, align 8 @@ -62,18 +62,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call dereferenceable(8) ptr @unknown_decl() %load = load i64, ptr %call, align 8 @@ -87,30 +87,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call0 = call dereferenceable(4) ptr @declared_with_ret_deref() %load0 = load i64, ptr %call0, align 8 @@ -126,18 +126,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call nonnull ptr @declared_with_ret_deref_or_null() %load = load i64, ptr %call, align 8 @@ -150,18 +150,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call ptr @nonnull_decl() %load = load i64, ptr %call, align 8 @@ -174,18 +174,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call = call dereferenceable_or_null(8) ptr @nonnull_decl() %load = load i64, ptr %call, align 8 @@ -199,30 +199,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %call0 = call dereferenceable_or_null(4) nonnull ptr @declared_with_ret_deref_or_null() %load0 = load i64, ptr %call0, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -9,42 +9,42 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY8]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY7]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY6]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY5]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY4]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY3]], debug-location !6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6 - ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY16]], [[PRED_COPY1]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY17]], [[PRED_COPY]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[PRED_COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]], debug-location !6 + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]], debug-location !6 + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]], debug-location !6 + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY9]], debug-location !6 + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]], debug-location !6 + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]], debug-location !6 + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]], debug-location !6 + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]], debug-location !6 + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]], debug-location !6 + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[V_OR3_B32_e64_]], debug-location !6 ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6 @@ -60,12 +60,12 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[PRED_COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: %returnaddr = call ptr @llvm.returnaddress(i32 0), !dbg !6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -8,11 +8,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -23,13 +23,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -40,7 +40,7 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add @@ -51,9 +51,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -64,12 +64,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY1]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ADD1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -7,21 +7,21 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret void @@ -32,23 +32,23 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret float %ret @@ -59,21 +59,21 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret void @@ -84,23 +84,23 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -7,12 +7,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) @@ -24,19 +24,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret double %ret @@ -47,12 +47,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic @@ -64,19 +64,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -6,11 +6,11 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret void @@ -21,12 +21,12 @@ ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -21,7 +21,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, ptr addrspace(1) undef ret i1 %val @@ -45,7 +45,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -57,7 +57,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -69,7 +69,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, ptr addrspace(1) undef ret i7 %val @@ -81,7 +81,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -93,7 +93,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -105,7 +105,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, ptr addrspace(1) undef ret i8 %val @@ -117,7 +117,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -129,7 +129,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -141,7 +141,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, ptr addrspace(1) undef ret i16 %val @@ -153,7 +153,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, ptr addrspace(1) undef ret half %val @@ -165,7 +165,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -177,7 +177,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -189,7 +189,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, ptr addrspace(1) undef ret i24 %val @@ -203,8 +203,8 @@ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, ptr addrspace(1) undef ret <2 x i24> %val @@ -219,9 +219,9 @@ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, ptr addrspace(1) undef ret <3 x i24> %val @@ -232,7 +232,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, ptr addrspace(1) undef ret i32 %val @@ -245,8 +245,8 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -259,8 +259,8 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -273,8 +273,8 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val @@ -286,8 +286,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, ptr addrspace(1) undef ret i64 %val @@ -300,9 +300,9 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -315,9 +315,9 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -330,9 +330,9 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, ptr addrspace(1) undef ret i65 %val @@ -343,7 +343,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, ptr addrspace(1) undef ret float %val @@ -355,8 +355,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, ptr addrspace(1) undef ret double %val @@ -368,10 +368,10 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, ptr addrspace(1) undef ret <2 x double> %val @@ -383,8 +383,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, ptr addrspace(1) undef ret <2 x i32> %val @@ -396,9 +396,9 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, ptr addrspace(1) undef ret <3 x i32> %val @@ -410,10 +410,10 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, ptr addrspace(1) undef ret <4 x i32> %val @@ -425,11 +425,11 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, ptr addrspace(1) undef ret <5 x i32> %val @@ -442,14 +442,14 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i32>, ptr addrspace(1) %ptr @@ -463,22 +463,22 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i32>, ptr addrspace(1) %ptr @@ -492,38 +492,38 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <32 x i32>, ptr addrspace(1) %ptr @@ -536,10 +536,10 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, ptr addrspace(1) undef ret <2 x i64> %val @@ -552,12 +552,12 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <3 x i64>, ptr addrspace(1) %ptr @@ -571,14 +571,14 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <4 x i64>, ptr addrspace(1) %ptr @@ -592,16 +592,16 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <5 x i64>, ptr addrspace(1) %ptr @@ -615,22 +615,22 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i64>, ptr addrspace(1) %ptr @@ -644,38 +644,38 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i64>, ptr addrspace(1) %ptr @@ -687,7 +687,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, ptr addrspace(1) undef ret <2 x i16> %val @@ -698,7 +698,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, ptr addrspace(1) undef ret <2 x half> %val @@ -713,8 +713,8 @@ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, ptr addrspace(1) undef ret <3 x i16> %val @@ -726,8 +726,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, ptr addrspace(1) undef ret <4 x i16> %val @@ -739,8 +739,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, ptr addrspace(1) undef ret <4 x half> %val @@ -756,9 +756,9 @@ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <5 x i16>, ptr addrspace(1) %ptr @@ -772,10 +772,10 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <8 x i16>, ptr addrspace(1) %ptr @@ -789,14 +789,14 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i16>, ptr addrspace(1) %ptr @@ -827,37 +827,37 @@ ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT16]](s32) ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT17]](s32) ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT18]](s32) ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT19]](s32) ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT20]](s32) ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT21]](s32) ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT22]](s32) ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT23]](s32) ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[ANYEXT24]](s32) ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[ANYEXT25]](s32) ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[ANYEXT26]](s32) ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[ANYEXT27]](s32) ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[ANYEXT28]](s32) ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[ANYEXT29]](s32) ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[ANYEXT30]](s32) ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[ANYEXT31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <16 x i8>, ptr addrspace(1) %ptr @@ -873,9 +873,9 @@ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, ptr addrspace(1) undef ret <2 x i8> %val @@ -891,11 +891,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, ptr addrspace(1) undef ret <3 x i8> %val @@ -913,13 +913,13 @@ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT6]](s32) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <4 x i8>, ptr addrspace(1) %ptr @@ -935,8 +935,8 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, ptr addrspace(1) undef ret { i8, i32 } %val @@ -947,13 +947,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[PRED_COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, ptr addrspace(1) undef @@ -974,11 +974,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[PRED_COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <33 x i32>, ptr addrspace(1) %ptr @@ -990,18 +990,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p1) = PRED_COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[PRED_COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[PRED_COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, ptr addrspace(1) %p, i32 %idx %val = load <33 x i32>, ptr addrspace(1) %gep @@ -1013,16 +1013,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[PRED_COPY]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef @@ -1035,16 +1035,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PRED_COPY]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef @@ -1070,10 +1070,10 @@ ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, ptr addrspace(3) undef %load1 = load volatile i32, ptr addrspace(3) undef @@ -1105,10 +1105,10 @@ ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, ptr addrspace(3) undef %load1 = load volatile float, ptr addrspace(3) undef @@ -1128,12 +1128,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p5) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) @@ -1160,38 +1160,38 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1204,38 +1204,38 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1248,38 +1248,38 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, ptr addrspace(1) undef ret i1022 %val @@ -1292,7 +1292,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 @@ -1304,15 +1304,15 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `ptr addrspace(1) undef` + 144, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[PRED_COPY]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -9,21 +9,21 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void @@ -34,23 +34,23 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void @@ -61,21 +61,21 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void @@ -86,23 +86,23 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void @@ -113,21 +113,21 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void @@ -138,23 +138,23 @@ ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -8,23 +8,23 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret @@ -35,25 +35,25 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret @@ -64,23 +64,23 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret @@ -91,25 +91,25 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret @@ -120,23 +120,23 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret @@ -147,25 +147,25 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll @@ -7,12 +7,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) @@ -24,19 +24,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -47,12 +47,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -65,20 +65,20 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -89,12 +89,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) @@ -106,19 +106,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -129,12 +129,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -147,20 +147,20 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -171,12 +171,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic @@ -188,19 +188,19 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret @@ -211,12 +211,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -229,20 +229,20 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -8,21 +8,21 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -33,23 +33,23 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -60,21 +60,21 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -85,23 +85,23 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -7,12 +7,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -23,13 +23,13 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -40,12 +40,12 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -56,13 +56,13 @@ ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -19,8 +19,8 @@ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @external_constant } @@ -31,8 +31,8 @@ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @external_global } @@ -43,8 +43,8 @@ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @external_other } @@ -54,8 +54,8 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @internal_constant } @@ -65,8 +65,8 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @internal_global } @@ -76,8 +76,8 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @internal_other } @@ -88,7 +88,7 @@ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 ret ptr addrspace(6) @external_constant32 } @@ -98,7 +98,7 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 ret ptr addrspace(6) @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll @@ -51,9 +51,9 @@ ; CHECK-LABEL: name: return_type_is_too_small_scalar ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TRUNC]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" () ret i32 %reg @@ -88,16 +88,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[BUILD_VECTOR]](<8 x s32>) ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: SI_RETURN call void asm sideeffect "; use $0", "{v[0:7]}"(<8 x i32> %arg) @@ -110,15 +110,15 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg) @@ -131,9 +131,9 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(i64 %arg) @@ -145,9 +145,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[PRED_COPY]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[ANYEXT]](s64) ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1 ; CHECK-NEXT: SI_RETURN call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg) @@ -160,9 +160,9 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg) @@ -175,7 +175,7 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir @@ -17,13 +17,13 @@ ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_ss ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit %smax ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax %src0:sgpr(s32) = COPY $sgpr0 @@ -46,13 +46,13 @@ ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_ss_commute ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit %smax ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_ss_commute ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax %src0:sgpr(s32) = COPY $sgpr0 @@ -75,7 +75,7 @@ ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_vv ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: %src0:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec ; GFX6-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec @@ -83,7 +83,7 @@ ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: %src0:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: %ineg:vgpr_32 = V_SUB_U32_e64 %zero, %src0, 0, implicit $exec ; GFX9-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -18,24 +18,24 @@ ; GFX6-LABEL: name: add_s32 ; GFX6: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec - ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_2]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_4]] ; GFX9-LABEL: name: add_s32 ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[S_ADD_I32_]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,14 +73,14 @@ ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], 64, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], 64, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 -64 @@ -102,14 +102,14 @@ ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64 = V_SUB_CO_U32_e64 [[COPY]], 64, 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[PRED_COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]] ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -64 @@ -131,16 +131,16 @@ ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 16 @@ -162,16 +162,16 @@ ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -19,16 +19,16 @@ ; GFX6-LABEL: name: add_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -52,16 +52,16 @@ ; GFX6-LABEL: name: add_s16_zext_to_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ADD_NC_U16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -87,14 +87,14 @@ ; GFX6-LABEL: name: add_s16_neg_inline_const_64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[PRED_COPY]], 0, 64, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -117,14 +117,14 @@ ; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[PRED_COPY]], 0, 64, 0, 0, implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_SUB_NC_U16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir @@ -14,16 +14,16 @@ ; WAVE64-LABEL: name: class_s32_vcc_sv ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -43,16 +43,16 @@ ; WAVE64-LABEL: name: class_s32_vcc_vs ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -72,16 +72,16 @@ ; WAVE64-LABEL: name: class_s32_vcc_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -101,16 +101,16 @@ ; WAVE64-LABEL: name: class_s64_vcc_sv ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_sv ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -131,16 +131,16 @@ ; WAVE64-LABEL: name: class_s64_vcc_vs ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_vs ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -161,16 +161,16 @@ ; WAVE64-LABEL: name: class_s64_vcc_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir @@ -21,18 +21,18 @@ liveins: $sgpr0, $vgpr0 ; WAVE32-LABEL: name: class_s16_vcc_sv ; WAVE32: liveins: $sgpr0, $vgpr0 - ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_sv ; WAVE64: liveins: $sgpr0, $vgpr0 - ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 @@ -49,18 +49,18 @@ liveins: $sgpr0, $vgpr0 ; WAVE32-LABEL: name: class_s16_vcc_vs ; WAVE32: liveins: $sgpr0, $vgpr0 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_vs ; WAVE64: liveins: $sgpr0, $vgpr0 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:sgpr(s32) = PRED_COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 @@ -77,18 +77,18 @@ liveins: $vgpr0, $vgpr1 ; WAVE32-LABEL: name: class_s16_vcc_vv ; WAVE32: liveins: $vgpr0, $vgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_vv ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: cos_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: cos_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir @@ -17,10 +17,10 @@ ; CHECK-LABEL: name: cos_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ ; CHECK-LABEL: name: cos_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: cvt_pk_i16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: cvt_pk_i16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ ; GCN-LABEL: name: cvt_pk_i16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: cvt_pk_u16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: cvt_pk_u16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ ; GCN-LABEL: name: cvt_pk_u16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: cvt_pknorm_i16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: cvt_pknorm_i16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ ; GCN-LABEL: name: cvt_pknorm_i16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: cvt_pknorm_u16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: cvt_pknorm_u16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ ; GCN-LABEL: name: cvt_pknorm_u16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -14,10 +14,10 @@ ; GCN-LABEL: name: cvt_pkrtz_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -37,10 +37,10 @@ ; GCN-LABEL: name: cvt_pkrtz_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -59,10 +59,10 @@ ; GCN-LABEL: name: cvt_pkrtz_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir @@ -15,8 +15,8 @@ ; CHECK-LABEL: name: ds_swizzle_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[PRED_COPY]], 0, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 @@ -38,8 +38,8 @@ ; CHECK-LABEL: name: ds_swizzle_65535 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[PRED_COPY]], 65535, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir @@ -13,10 +13,10 @@ ; CHECK-LABEL: name: fcmp_false_f16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: fcmp_true_f16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -65,9 +65,9 @@ ; CHECK-LABEL: name: fcmp_false_f32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -87,9 +87,9 @@ ; CHECK-LABEL: name: fcmp_true_f32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -109,10 +109,10 @@ ; CHECK-LABEL: name: fcmp_false_f64 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_F_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F64_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -135,10 +135,10 @@ ; CHECK-LABEL: name: fcmp_true_f64 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_TRU_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F64_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir @@ -13,10 +13,10 @@ ; CHECK-LABEL: name: fcmp_false_f16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: fcmp_true_f16 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -65,9 +65,9 @@ ; CHECK-LABEL: name: fcmp_false_f32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CMP_F_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -87,9 +87,9 @@ ; CHECK-LABEL: name: fcmp_true_f32 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CMP_TRU_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -109,10 +109,10 @@ ; CHECK-LABEL: name: fcmp_false_f64 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_F_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F64_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -135,10 +135,10 @@ ; CHECK-LABEL: name: fcmp_true_f64 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_CVT_F64_F32_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_CVT_F64_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_CMP_TRU_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F64_e64 0, [[V_CVT_F64_F32_e64_]], 0, [[V_CVT_F64_F32_e64_1]], 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F64_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir @@ -17,10 +17,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -42,10 +42,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -67,10 +67,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vvsv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -92,11 +92,11 @@ ; GCN-LABEL: name: fmad_ftz_s32_vvvs ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -120,9 +120,9 @@ ; GCN-LABEL: name: fmad_ftz_s32_vssv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -143,10 +143,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vsvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -167,10 +167,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vvss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -191,9 +191,9 @@ ; GCN-LABEL: name: fmad_ftz_s32_vsss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0 @@ -232,10 +232,10 @@ ; GCN-LABEL: name: fmad_ftz_s32_vvv_fneg_v ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -14,10 +14,10 @@ ; GCN-LABEL: name: fmed3_s32_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -39,10 +39,10 @@ ; GCN-LABEL: name: fmed3_s32_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -64,10 +64,10 @@ ; GCN-LABEL: name: fmed3_s32_vvsv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -89,10 +89,10 @@ ; GCN-LABEL: name: fmed3_s32_vvvs ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -116,9 +116,9 @@ ; GCN-LABEL: name: fmed3_s32_vssv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -139,9 +139,9 @@ ; GCN-LABEL: name: fmed3_s32_vsvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -162,9 +162,9 @@ ; GCN-LABEL: name: fmed3_s32_vvss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -185,8 +185,8 @@ ; GCN-LABEL: name: fmed3_s32_vsss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -18,14 +18,14 @@ ; GCN-LABEL: name: fmed3_s16_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 + %2:vgpr(s32) = PRED_COPY $vgpr2 %3:vgpr(s16) = G_TRUNC %0 %4:vgpr(s16) = G_TRUNC %1 %5:vgpr(s16) = G_TRUNC %2 @@ -45,14 +45,14 @@ ; GCN-LABEL: name: fmed3_s16_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 + %2:vgpr(s32) = PRED_COPY $vgpr1 %3:sgpr(s16) = G_TRUNC %0 %4:vgpr(s16) = G_TRUNC %1 %5:vgpr(s16) = G_TRUNC %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: fract_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: fract_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ ; CHECK-LABEL: name: fract_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ ; CHECK-LABEL: name: fract_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir @@ -18,10 +18,10 @@ ; CHECK-LABEL: name: fract_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 S_ENDPGM 0, implicit %2 @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: fract_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir @@ -13,10 +13,10 @@ ; GCN-LABEL: name: ldexp_s32_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -35,10 +35,10 @@ ; GCN-LABEL: name: ldexp_s32_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -57,10 +57,10 @@ ; GCN-LABEL: name: ldexp_s32_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -79,10 +79,10 @@ ; GCN-LABEL: name: ldexp_s64_vsv ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -101,10 +101,10 @@ ; GCN-LABEL: name: ldexp_s64_vvs ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -123,10 +123,10 @@ ; GCN-LABEL: name: ldexp_s64_vvv ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir @@ -17,12 +17,12 @@ liveins: $sgpr0, $vgpr0 ; GCN-LABEL: name: ldexp_s16_vsv ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 @@ -39,12 +39,12 @@ liveins: $sgpr0, $vgpr0 ; GCN-LABEL: name: ldexp_s16_vvs ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:sgpr(s32) = PRED_COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 @@ -61,12 +61,12 @@ liveins: $vgpr0, $vgpr1 ; GCN-LABEL: name: ldexp_s16_vvv ; GCN: liveins: $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir @@ -27,9 +27,9 @@ ; GCN-LABEL: name: mbcnt_lo_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -48,9 +48,9 @@ ; GCN-LABEL: name: smin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -69,9 +69,9 @@ ; GCN-LABEL: name: smin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir @@ -13,9 +13,9 @@ ; GCN-LABEL: name: mul_u24_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ ; GCN-LABEL: name: mul_u24_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ ; GCN-LABEL: name: mul_u24_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir @@ -13,9 +13,9 @@ ; CHECK-LABEL: name: mulhi_i24_vsv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: mulhi_i24_vvs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ ; CHECK-LABEL: name: mulhi_i24_vvv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir @@ -13,9 +13,9 @@ ; CHECK-LABEL: name: mulhi_u24_vsv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: mulhi_u24_vvs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ ; CHECK-LABEL: name: mulhi_u24_vvv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir @@ -19,10 +19,10 @@ ; CHECK-LABEL: name: rcp_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: rcp_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: rcp_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: rcp_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ ; CHECK-LABEL: name: rcp_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ ; CHECK-LABEL: name: rcp_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir @@ -17,10 +17,10 @@ ; CHECK-LABEL: name: rcp_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ ; CHECK-LABEL: name: rcp_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir @@ -16,8 +16,8 @@ ; GCN-LABEL: name: readfirstlane_v ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 @@ -35,8 +35,8 @@ ; GCN-LABEL: name: readfirstlane_v_imm ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 123, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[PRED_COPY]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:vgpr(s32) = G_CONSTANT i32 123 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir @@ -22,7 +22,7 @@ ; GCN-LABEL: name: reloc_constant_sgpr32 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @arst - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr0 %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 $sgpr0 = COPY %0 @@ -40,7 +40,7 @@ ; GCN-LABEL: name: reloc_constant_vgpr32 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @arst, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 $vgpr0 = COPY %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir @@ -19,10 +19,10 @@ ; CHECK-LABEL: name: rsq_clamp_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: rsq_clamp_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir @@ -19,10 +19,10 @@ ; CHECK-LABEL: name: rsq_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: rsq_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: rsq_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: rsq_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ ; CHECK-LABEL: name: rsq_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ ; CHECK-LABEL: name: rsq_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir @@ -17,10 +17,10 @@ ; CHECK-LABEL: name: rsq_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ ; CHECK-LABEL: name: rsq_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir @@ -14,8 +14,8 @@ ; GCN-LABEL: name: test_sendmsg ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $m0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $m0 = COPY [[PRED_COPY]] ; GCN-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0 ; GCN-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir @@ -14,8 +14,8 @@ ; CHECK-LABEL: name: sffbh_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 @@ -35,8 +35,8 @@ ; CHECK-LABEL: name: sffbh_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 @@ -56,8 +56,8 @@ ; CHECK-LABEL: name: sffbh_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir @@ -15,9 +15,9 @@ ; CHECK-LABEL: name: sin_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: sin_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir @@ -17,10 +17,10 @@ ; CHECK-LABEL: name: sin_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ ; CHECK-LABEL: name: sin_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -17,39 +17,39 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -71,59 +71,59 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -147,39 +147,39 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -201,59 +201,59 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -277,79 +277,79 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX9-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -373,35 +373,35 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -422,35 +422,35 @@ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -19,69 +19,69 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -103,89 +103,89 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -209,69 +209,69 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -293,89 +293,89 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -399,109 +399,109 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX6-NEXT: %19:vgpr_32, dead %21:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %19, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %19:vgpr_32, dead %21:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %19, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -525,61 +525,61 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -600,61 +600,61 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -675,71 +675,71 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -761,91 +761,91 @@ ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GFX7-FLAT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX7-FLAT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc + ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY5]], [[PRED_COPY6]], implicit-def $scc, implicit $scc ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY5]], [[PRED_COPY6]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir @@ -15,8 +15,8 @@ ; CHECK-LABEL: name: ffbh_u32_s32_s_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_AMDGPU_FFBH_U32 %0 @@ -38,8 +38,8 @@ ; CHECK-LABEL: name: ffbh_u32_s32_v_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 @@ -61,8 +61,8 @@ ; CHECK-LABEL: name: ffbh_u32_v_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir @@ -15,8 +15,8 @@ ; CHECK-LABEL: name: ffbl_b32_s32_s_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_AMDGPU_FFBL_B32 %0 @@ -38,8 +38,8 @@ ; CHECK-LABEL: name: ffbl_b32_s32_v_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 @@ -61,8 +61,8 @@ ; CHECK-LABEL: name: ffbl_b32_v_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -17,21 +17,21 @@ ; WAVE64-LABEL: name: and_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -57,16 +57,16 @@ ; WAVE64-LABEL: name: and_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -89,16 +89,16 @@ ; WAVE64-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -121,16 +121,16 @@ ; WAVE64-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -153,16 +153,16 @@ ; WAVE64-LABEL: name: and_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -183,16 +183,16 @@ ; WAVE64-LABEL: name: and_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -213,16 +213,16 @@ ; WAVE64-LABEL: name: and_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -243,16 +243,16 @@ ; WAVE64-LABEL: name: and_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -273,16 +273,16 @@ ; WAVE64-LABEL: name: and_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -303,16 +303,16 @@ ; WAVE64-LABEL: name: and_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -333,16 +333,16 @@ ; WAVE64-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -395,22 +395,22 @@ ; WAVE64-LABEL: name: and_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] @@ -440,27 +440,27 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_AND_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_AND_B32_1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -488,9 +488,9 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -499,15 +499,15 @@ ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_AND_B32_1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -535,16 +535,16 @@ ; WAVE64-LABEL: name: and_s32_sgpr_sgpr_sgpr_result_reg_class ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr_result_reg_class ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -15,8 +15,8 @@ ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -36,9 +36,9 @@ ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_ANYEXT %0 @@ -58,9 +58,9 @@ ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -81,9 +81,9 @@ ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s64) = G_ANYEXT %0 @@ -103,9 +103,9 @@ ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -142,9 +142,9 @@ ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[PRED_COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_ANYEXT %1 @@ -164,8 +164,8 @@ ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -184,10 +184,10 @@ ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_ANYEXT %1 @@ -206,9 +206,9 @@ ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_ANYEXT %1 @@ -228,8 +228,8 @@ ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -248,8 +248,8 @@ ; GCN-LABEL: name: anyext_sgpr_s1_to_vgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -268,8 +268,8 @@ ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -291,8 +291,8 @@ ; GCN-LABEL: name: anyext_regclass_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -17,37 +17,37 @@ ; GFX6-LABEL: name: ashr_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX7-LABEL: name: ashr_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX8-LABEL: name: ashr_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX9-LABEL: name: ashr_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX10-LABEL: name: ashr_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ ; GFX6-LABEL: name: ashr_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ ; GFX6-LABEL: name: ashr_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ ; GFX6-LABEL: name: ashr_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ ; GFX6-LABEL: name: ashr_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX7-LABEL: name: ashr_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX8-LABEL: name: ashr_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX9-LABEL: name: ashr_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX10-LABEL: name: ashr_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ ; GFX6-LABEL: name: ashr_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ ; GFX6-LABEL: name: ashr_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ ; GFX6-LABEL: name: ashr_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -85,30 +85,30 @@ ; GFX8-LABEL: name: ashr_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -178,30 +178,30 @@ ; GFX8-LABEL: name: ashr_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -223,31 +223,31 @@ ; GFX8-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -423,30 +423,30 @@ ; GFX8-LABEL: name: ashr_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir @@ -79,16 +79,16 @@ ; GFX9-LABEL: name: ashr_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ ; GFX9-LABEL: name: ashr_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ ; GFX9-LABEL: name: ashr_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir @@ -19,37 +19,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s32_local ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_local ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -70,39 +70,39 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -125,37 +125,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s64_local ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_local ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -176,37 +176,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir @@ -19,37 +19,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s32_region ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_region ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -70,39 +70,39 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -125,37 +125,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s64_region ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_region ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -176,37 +176,37 @@ ; GFX6-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -17,31 +17,31 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -61,27 +61,27 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -100,51 +100,51 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -166,47 +166,47 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -227,51 +227,51 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -293,47 +293,47 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -354,51 +354,51 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -420,47 +420,47 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -481,71 +481,71 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -567,67 +567,67 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -648,31 +648,31 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -692,27 +692,27 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -731,51 +731,51 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -797,47 +797,47 @@ ; GFX7-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -18,43 +18,43 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -74,38 +74,38 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -124,53 +124,53 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -192,48 +192,48 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -254,63 +254,63 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -332,58 +332,58 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -404,63 +404,63 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -482,58 +482,58 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -554,84 +554,84 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -653,79 +653,79 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -746,43 +746,43 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -802,38 +802,38 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -852,63 +852,63 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -930,58 +930,58 @@ ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -20,18 +20,18 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -39,7 +39,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -59,16 +59,16 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_ADD_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -94,18 +94,18 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -115,7 +115,7 @@ ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -20,18 +20,18 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -39,7 +39,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -59,16 +59,16 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -94,18 +94,18 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -115,7 +115,7 @@ ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -19,26 +19,26 @@ ; GFX6-LABEL: name: atomicrmw_xchg_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -58,28 +58,28 @@ ; GFX6-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir @@ -19,26 +19,26 @@ ; GFX6-LABEL: name: atomicrmw_xchg_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -58,28 +58,28 @@ ; GFX6-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN --- @@ -7,13 +8,15 @@ regBankSelected: true tracksRegLiveness: true -# GCN-LABEL: name: bitcast -# GCN: [[A:%[0-9]+]]:vgpr_32 = COPY $vgpr0 -# GCN: S_ENDPGM 0, implicit [[A]] body: | bb.0: liveins: $vgpr0 + ; GCN-LABEL: name: bitcast + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_BITCAST %0 %2:vgpr(s32) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir @@ -12,8 +12,8 @@ ; CHECK-LABEL: name: bitreverse_i32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_BITREVERSE %0 @@ -31,8 +31,8 @@ ; CHECK-LABEL: name: bitreverse_i32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_BITREVERSE %0 @@ -50,8 +50,8 @@ ; CHECK-LABEL: name: bitreverse_i32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_BITREVERSE %0 @@ -69,8 +69,8 @@ ; CHECK-LABEL: name: bitreverse_i64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_BITREVERSE %0 @@ -88,11 +88,11 @@ ; CHECK-LABEL: name: bitreverse_i64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -114,11 +114,11 @@ ; CHECK-LABEL: name: bitreverse_i64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -19,11 +19,11 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -52,7 +52,7 @@ ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: $scc = COPY [[DEF]] + ; GCN-NEXT: $scc = PRED_COPY [[DEF]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -78,11 +78,11 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} @@ -118,10 +118,10 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: $vcc = COPY [[V_CMP_EQ_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: $vcc = PRED_COPY [[V_CMP_EQ_U32_e64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -207,10 +207,10 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: $vcc = COPY [[V_CMP_CLASS_F32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: $vcc = PRED_COPY [[V_CMP_CLASS_F32_e64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -238,14 +238,14 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: %5:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY3]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_F32_e64_]], implicit-def dead $scc + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -277,15 +277,15 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY2]], implicit-def $scc ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_1]] + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -317,13 +317,13 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1 ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_]] + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir @@ -13,18 +13,18 @@ ; GFX7-LABEL: name: bswap_i32_vv ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 8, implicit $exec - ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 24, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY]], 8, implicit $exec + ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY]], 24, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935 ; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]] ; GFX8-LABEL: name: bswap_i32_vv ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051 - ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec + ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[PRED_COPY]], [[S_MOV_B32_]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_BSWAP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -15,9 +15,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -38,9 +38,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -64,18 +64,18 @@ ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hl ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[PRED_COPY]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hl ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY1]], [[COPY]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[PRED_COPY1]], [[PRED_COPY]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -98,9 +98,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -125,9 +125,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -148,9 +148,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -171,8 +171,8 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 @@ -192,9 +192,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF @@ -216,8 +216,8 @@ ; GFX9PLUS: liveins: $sgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_IMPLICIT_DEF %1:sgpr(s32) = COPY $sgpr1 @@ -238,8 +238,8 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_undef ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 @@ -260,8 +260,8 @@ ; GFX9PLUS: liveins: $sgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr1 @@ -282,9 +282,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_zero ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -305,8 +305,8 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_lshr16_zero ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], 16, implicit-def $scc ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr0 @@ -330,11 +330,11 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh_multi_use ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -357,11 +357,11 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_lhs ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[COPY1]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -385,21 +385,21 @@ ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_1]] ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -423,11 +423,11 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh_wrong_shift_amt ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -450,11 +450,11 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh_wrong_shift_amt ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 @@ -659,9 +659,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_var_constant ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 456 @@ -683,8 +683,8 @@ ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 456 %1:sgpr(s32) = COPY $sgpr0 @@ -705,9 +705,9 @@ ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_var_0 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -729,8 +729,8 @@ ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: test_build_vector_v_v2s32_s_s32_v_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -60,9 +60,9 @@ ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_s_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -83,9 +83,9 @@ ; GCN-LABEL: name: test_build_vector_s_v2s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -106,10 +106,10 @@ ; GCN-LABEL: name: test_build_vector_s_v2s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %4:sgpr(<2 x s64>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -14,10 +14,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v4s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -36,10 +36,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v4s16_s_v2s16_v_v2s16 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -58,10 +58,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v4s16_v_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -80,10 +80,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v4s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -102,11 +102,11 @@ ; GCN-LABEL: name: test_concat_vectors_s_s96_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -126,11 +126,11 @@ ; GCN-LABEL: name: test_concat_vectors_v_s96_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -150,12 +150,12 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s16_s_v2s16_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -176,12 +176,12 @@ ; GCN-LABEL: name: test_concat_vectors_v_v8s16_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -202,10 +202,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -224,10 +224,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v8s16_v_v4s16_v_v4s16 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 %2:vgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -246,13 +246,13 @@ ; GCN-LABEL: name: test_concat_vectors_s_s160_s_v2s16_s_v2s16_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -274,13 +274,13 @@ ; GCN-LABEL: name: test_concat_vectors_v_s160_v_v2s16_v_v2s16_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -302,10 +302,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -326,10 +326,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v12s16_v_v4s16_v_v4s16_v_v4s16 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -350,12 +350,12 @@ ; GCN-LABEL: name: test_concat_vectors_s_v16s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 @@ -376,10 +376,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v8s16_s_v8s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<8 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<8 x s16>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<16 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -398,10 +398,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v32s16_s_v12s16_s_v12s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<16 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<16 x s16>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<32 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -420,16 +420,16 @@ ; GCN-LABEL: name: test_concat_vectors_s_v32s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $sgpr12_sgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr12_sgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 @@ -454,16 +454,16 @@ ; GCN-LABEL: name: test_concat_vectors_v_v512_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 %2:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 @@ -493,10 +493,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v4s32_s_v2s32_s_v2s32 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 %4:sgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -515,10 +515,10 @@ ; GCN-LABEL: name: test_concat_vectors_v_v4s32_v_v2s32_v_v2s32 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 %2:vgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -537,12 +537,12 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s32_s_v2s32_s_v2s32_s_v2s32_s_v2s32 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 %2:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 @@ -564,10 +564,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s32_s_v4s32_s_v4s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<8 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -586,10 +586,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v16s32_s_v8s32_s_v8s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<8 x s32>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<16 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -608,16 +608,16 @@ ; GCN-LABEL: name: test_concat_vectors_v_v16s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 %2:vgpr(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -642,10 +642,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -664,10 +664,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %3:sgpr(<4 x s64>) = G_CONCAT_VECTORS %0, %1 @@ -708,10 +708,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v4s64_s_v4s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1 @@ -730,12 +730,12 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v2s64_s_v2s64_s_v2s64_s_v2s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7, [[COPY2]], %subreg.sub8_sub9_sub10_sub11, [[COPY3]], %subreg.sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7, [[PRED_COPY2]], %subreg.sub8_sub9_sub10_sub11, [[PRED_COPY3]], %subreg.sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<2 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -756,10 +756,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1 @@ -778,10 +778,10 @@ ; GCN-LABEL: name: test_concat_vectors_s_v4p3_s_v2p3_s_v2p3 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x p3>) = G_CONCAT_VECTORS %0, %1 @@ -800,12 +800,12 @@ ; GCN-LABEL: name: test_concat_vectors_s_v8p3_s_v2p3_s_v2p3_v2p3_s_v2p3 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -597,9 +597,9 @@ ; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]] - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; WAVE64-NEXT: $scc = COPY [[S_AND_B32_]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]] + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[S_AND_B32_]] ; WAVE64-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; WAVE64-NEXT: S_BRANCH %bb.2 ; WAVE64-NEXT: {{ $}} @@ -614,7 +614,7 @@ ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 1, implicit-def $scc - ; WAVE32-NEXT: $scc = COPY [[S_AND_B32_]] + ; WAVE32-NEXT: $scc = PRED_COPY [[S_AND_B32_]] ; WAVE32-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; WAVE32-NEXT: S_BRANCH %bb.2 ; WAVE32-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -17,17 +17,17 @@ ; WAVE64-LABEL: name: copy ; WAVE64: liveins: $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy ; WAVE32: liveins: $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF @@ -46,25 +46,25 @@ ; WAVE64-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -86,30 +86,30 @@ ; WAVE64-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]] - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY4]], implicit $exec + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -134,21 +134,21 @@ ; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64_xexec = PRED_COPY $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY $scc + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -169,13 +169,13 @@ ; WAVE64-LABEL: name: copy_sgpr_no_type ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32_xm0 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_sgpr_no_type ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32_xm0 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_32_xm0 = COPY $sgpr0 %1:sreg_32_xm0 = COPY %0 S_ENDPGM 0, implicit %1 @@ -195,13 +195,13 @@ ; WAVE64-LABEL: name: copy_vgpr_no_type ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vgpr_no_type ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY %0 S_ENDPGM 0, implicit %1 @@ -221,13 +221,13 @@ ; WAVE64-LABEL: name: copy_maybe_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_maybe_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_64_xexec = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = COPY %0 S_ENDPGM 0, implicit %1 @@ -249,15 +249,15 @@ ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[PRED_COPY]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[PRED_COPY]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s32) = COPY $sgpr0 %1:vcc(s1) = G_TRUNC %0 %2:vcc(s1) = COPY %1 @@ -278,14 +278,14 @@ ; WAVE64-LABEL: name: copy_s64_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: $vcc = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: $vcc = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc ; WAVE32-LABEL: name: copy_s64_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: $vcc = COPY [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: $vcc = PRED_COPY [[PRED_COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo %0:sgpr(s64) = COPY $sgpr0_sgpr1 $vcc = COPY %0 @@ -306,14 +306,14 @@ ; WAVE64-LABEL: name: copy_s32_to_vcc_lo ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: $vcc_lo = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: $vcc_lo = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc ; WAVE32-LABEL: name: copy_s32_to_vcc_lo ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: $vcc_lo = COPY [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: $vcc_lo = PRED_COPY [[PRED_COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo %0:sgpr(s32) = COPY $sgpr0 $vcc_lo = COPY %0 @@ -334,13 +334,13 @@ ; WAVE64-LABEL: name: copy_vcc_to_s64 ; WAVE64: liveins: $vcc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $vcc + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vcc_to_s64 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $vcc + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s64) = COPY $vcc S_ENDPGM 0, implicit %0 @@ -359,13 +359,13 @@ ; WAVE64-LABEL: name: copy_vcc_lo_to_s32 ; WAVE64: liveins: $vcc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $vcc_lo + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vcc_lo_to_s32 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $vcc_lo + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $vcc_lo S_ENDPGM 0, implicit %0 @@ -384,17 +384,17 @@ ; WAVE64-LABEL: name: copy_s1_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] ; WAVE32-LABEL: name: copy_s1_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir @@ -14,8 +14,8 @@ ; CHECK-LABEL: name: ctlz_zero_undef_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -35,8 +35,8 @@ ; CHECK-LABEL: name: ctlz_zero_undef_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -56,8 +56,8 @@ ; CHECK-LABEL: name: ctlz_zero_undef_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -77,8 +77,8 @@ ; CHECK-LABEL: name: ctlz_zero_undef_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -14,8 +14,8 @@ ; CHECK-LABEL: name: ctpop_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[PRED_COPY]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTPOP %0 @@ -35,8 +35,8 @@ ; CHECK-LABEL: name: ctpop_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTPOP %0 @@ -56,8 +56,8 @@ ; CHECK-LABEL: name: ctpop_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTPOP %0 @@ -77,9 +77,9 @@ ; CHECK-LABEL: name: add_ctpop_s32_v_vv_commute0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -101,9 +101,9 @@ ; CHECK-LABEL: name: add_ctpop_s32_v_vv_commute1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -126,10 +126,10 @@ ; CHECK-LABEL: name: add_ctpop_s32_s_ss_commute0 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[PRED_COPY]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -151,9 +151,9 @@ ; CHECK-LABEL: name: add_ctpop_s32_v_vs_commute0 ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,9 +176,9 @@ ; CHECK-LABEL: name: add_ctpop_s32_v_sv_commute0 ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -201,9 +201,9 @@ ; CHECK-LABEL: name: add_ctpop_s32_s_sv_commute0 ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -225,8 +225,8 @@ ; CHECK-LABEL: name: ctpop_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_BCNT1_I32_B64_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 [[COPY]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_BCNT1_I32_B64_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 [[PRED_COPY]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTPOP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir @@ -14,8 +14,8 @@ ; CHECK-LABEL: name: cttz_zero_undef_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -35,8 +35,8 @@ ; CHECK-LABEL: name: cttz_zero_undef_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -56,8 +56,8 @@ ; CHECK-LABEL: name: cttz_zero_undef_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -77,8 +77,8 @@ ; CHECK-LABEL: name: cttz_zero_undef_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -18,18 +18,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v2s32 ; MOVREL: liveins: $sgpr0_sgpr1, $sgpr2 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v2s32 ; GPRIDX: liveins: $sgpr0_sgpr1, $sgpr2 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -49,18 +49,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v3s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v3s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr2 @@ -80,18 +80,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -111,18 +111,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -142,18 +142,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v16s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v16s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -173,18 +173,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -204,18 +204,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v2s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v2s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -235,18 +235,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v4s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v4s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -266,18 +266,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -297,18 +297,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -328,18 +328,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -361,22 +361,22 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -398,18 +398,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub7, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub7, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -431,22 +431,22 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -468,18 +468,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub2_sub3, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub2_sub3, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -501,18 +501,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub4_sub5, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub4_sub5, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -534,22 +534,22 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -571,17 +571,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v2s32 ; MOVREL: liveins: $vgpr0_vgpr1, $sgpr2 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32 ; GPRIDX: liveins: $vgpr0_vgpr1, $sgpr2 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -601,17 +601,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v3s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:sgpr(s32) = COPY $sgpr2 @@ -631,17 +631,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -661,17 +661,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -691,17 +691,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v16s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -721,17 +721,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v32s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_1024 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_1024 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_]] %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -751,17 +751,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub1, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -783,21 +783,21 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -819,17 +819,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub7, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -851,21 +851,21 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -887,18 +887,18 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -918,17 +918,17 @@ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[PRED_COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -9,38 +9,38 @@ bb.0: ; CHECK-LABEL: name: extract512 ; CHECK: [[DEF:%[0-9]+]]:sgpr_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub15 - ; CHECK-NEXT: $sgpr0 = COPY [[COPY]] - ; CHECK-NEXT: $sgpr1 = COPY [[COPY1]] - ; CHECK-NEXT: $sgpr2 = COPY [[COPY2]] - ; CHECK-NEXT: $sgpr3 = COPY [[COPY3]] - ; CHECK-NEXT: $sgpr4 = COPY [[COPY4]] - ; CHECK-NEXT: $sgpr5 = COPY [[COPY5]] - ; CHECK-NEXT: $sgpr6 = COPY [[COPY6]] - ; CHECK-NEXT: $sgpr7 = COPY [[COPY7]] - ; CHECK-NEXT: $sgpr8 = COPY [[COPY8]] - ; CHECK-NEXT: $sgpr9 = COPY [[COPY9]] - ; CHECK-NEXT: $sgpr10 = COPY [[COPY10]] - ; CHECK-NEXT: $sgpr11 = COPY [[COPY11]] - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]] - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]] - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]] - ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub15 + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $sgpr8 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $sgpr9 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $sgpr10 = PRED_COPY [[PRED_COPY10]] + ; CHECK-NEXT: $sgpr11 = PRED_COPY [[PRED_COPY11]] + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]] + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]] + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]] + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 0 @@ -87,39 +87,39 @@ bb.0: ; CHECK-LABEL: name: extract_s_s32_s1024 ; CHECK: [[DEF:%[0-9]+]]:sgpr_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub30 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]], implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]], implicit [[COPY10]], implicit [[COPY11]], implicit [[COPY12]], implicit [[COPY13]], implicit [[COPY14]], implicit [[COPY15]], implicit [[COPY16]], implicit [[COPY17]], implicit [[COPY18]], implicit [[COPY19]], implicit [[COPY20]], implicit [[COPY21]], implicit [[COPY22]], implicit [[COPY23]], implicit [[COPY24]], implicit [[COPY25]], implicit [[COPY26]], implicit [[COPY27]], implicit [[COPY28]], implicit [[COPY29]], implicit [[COPY30]], implicit [[COPY31]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub30 + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]], implicit [[PRED_COPY8]], implicit [[PRED_COPY9]], implicit [[PRED_COPY10]], implicit [[PRED_COPY11]], implicit [[PRED_COPY12]], implicit [[PRED_COPY13]], implicit [[PRED_COPY14]], implicit [[PRED_COPY15]], implicit [[PRED_COPY16]], implicit [[PRED_COPY17]], implicit [[PRED_COPY18]], implicit [[PRED_COPY19]], implicit [[PRED_COPY20]], implicit [[PRED_COPY21]], implicit [[PRED_COPY22]], implicit [[PRED_COPY23]], implicit [[PRED_COPY24]], implicit [[PRED_COPY25]], implicit [[PRED_COPY26]], implicit [[PRED_COPY27]], implicit [[PRED_COPY28]], implicit [[PRED_COPY29]], implicit [[PRED_COPY30]], implicit [[PRED_COPY31]] %0:sgpr(s1024) = G_IMPLICIT_DEF %1:sgpr(s32) = G_EXTRACT %0:sgpr, 0 %2:sgpr(s32) = G_EXTRACT %0:sgpr, 32 @@ -168,9 +168,9 @@ bb.0: ; CHECK-LABEL: name: extract_sgpr_s64_from_s128 ; CHECK: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub2_sub3 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sgpr(s128) = G_IMPLICIT_DEF %1:sgpr(s64) = G_EXTRACT %0, 0 %2:sgpr(s64) = G_EXTRACT %0, 64 @@ -189,11 +189,11 @@ ; CHECK-LABEL: name: extract_sgpr_s96_from_s128 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY1]].sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY]].sub1_sub2_sub3 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = G_EXTRACT %0, 0 %2:sgpr(s96) = G_EXTRACT %0, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -23,31 +23,31 @@ ; SI-LABEL: name: fabs_s32_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 $sgpr0 = COPY %1 @@ -71,31 +71,31 @@ ; SI-LABEL: name: fabs_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -120,25 +120,25 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; VI-LABEL: name: fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; GFX9-LABEL: name: fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; GFX10-LABEL: name: fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -162,31 +162,31 @@ ; SI-LABEL: name: fabs_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 $sgpr0 = COPY %1 @@ -210,31 +210,31 @@ ; SI-LABEL: name: fabs_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -260,31 +260,31 @@ ; SI-LABEL: name: fabs_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -315,32 +315,32 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -366,31 +366,31 @@ ; SI-LABEL: name: fabs_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -415,25 +415,25 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; VI-LABEL: name: fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; GFX9-LABEL: name: fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; GFX10-LABEL: name: fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -460,42 +460,42 @@ ; SI-LABEL: name: fabs_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 @@ -523,42 +523,42 @@ ; SI-LABEL: name: fabs_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 @@ -694,41 +694,41 @@ ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_ss_no_src_constraint ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = IMPLICIT_DEF %1:sgpr(s64) = G_FABS %0:sgpr(s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir @@ -13,10 +13,10 @@ ; GFX8-LABEL: name: fadd_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -38,10 +38,10 @@ ; GFX8-LABEL: name: fadd_s16_vsv ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 @@ -63,10 +63,10 @@ ; GFX8-LABEL: name: fadd_s16_vvs ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -88,10 +88,10 @@ ; GFX8-LABEL: name: fadd_s16_vvv_fabs_lhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -114,10 +114,10 @@ ; GFX8-LABEL: name: fadd_s16_vvv_fabs_rhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 2, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -140,10 +140,10 @@ ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_lhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -167,10 +167,10 @@ ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_rhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 3, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -194,10 +194,10 @@ ; GFX8-LABEL: name: fadd_s16_fneg_copy_sgpr ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir @@ -13,10 +13,10 @@ ; GFX6-LABEL: name: fadd_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FADD %0, %1 @@ -36,10 +36,10 @@ ; GFX6-LABEL: name: fadd_s32_vsv ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_FADD %0, %1 @@ -59,10 +59,10 @@ ; GFX6-LABEL: name: fadd_s32_vvs ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_FADD %0, %1 @@ -82,10 +82,10 @@ ; GFX6-LABEL: name: fadd_s32_vvv_fabs_lhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %0 @@ -106,9 +106,9 @@ ; GFX6-LABEL: name: fadd_s32_vvv_fabs_rhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %1 @@ -129,10 +129,10 @@ ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_lhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %0 @@ -154,9 +154,9 @@ ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_rhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %1 @@ -179,11 +179,11 @@ ; GFX6-LABEL: name: fadd_s32_fneg_copy_sgpr ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:sgpr(s32) = G_FNEG %1 @@ -207,10 +207,10 @@ ; GFX6-LABEL: name: fadd_s32_copy_fneg_copy_fabs ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:sgpr(s32) = G_FABS %1 @@ -238,12 +238,12 @@ ; GFX6-LABEL: name: fadd_s32_copy_fabs_sgpr_copy_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[PRED_COPY2]], 2, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FABS %0 @@ -267,12 +267,12 @@ ; GFX6-LABEL: name: fadd_s32_copy_fneg_sgpr_copy_fneg_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 1, [[PRED_COPY2]], 1, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FNEG %0 @@ -296,12 +296,12 @@ ; GFX6-LABEL: name: fadd_s32_copy_fneg_fabs_sgpr_copy_fneg_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[PRED_COPY2]], 3, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir @@ -13,10 +13,10 @@ ; GFX6-LABEL: name: fadd_s64_vvv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FADD %0, %1 @@ -36,10 +36,10 @@ ; GFX6-LABEL: name: fadd_s64_vsv ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = G_FADD %0, %1 @@ -59,10 +59,10 @@ ; GFX6-LABEL: name: fadd_s64_vvs ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 %2:vgpr(s64) = G_FADD %0, %1 @@ -82,10 +82,10 @@ ; GFX6-LABEL: name: fadd_s64_vvv_fabs_lhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %0 @@ -106,9 +106,9 @@ ; GFX6-LABEL: name: fadd_s64_vvv_fabs_rhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %1 @@ -129,10 +129,10 @@ ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_lhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %0 @@ -154,9 +154,9 @@ ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_rhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = G_FABS %1 @@ -180,11 +180,11 @@ ; GFX6-LABEL: name: fadd_s64_fneg_copy_sgpr ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 %2:sgpr(s64) = G_FNEG %1 @@ -210,12 +210,12 @@ ; GFX6-LABEL: name: fadd_s64_copy_fabs_sgpr_copy_fabs_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[PRED_COPY2]], 2, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FABS %0 @@ -239,12 +239,12 @@ ; GFX6-LABEL: name: fadd_s64_copy_fneg_sgpr_copy_fneg_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 1, [[PRED_COPY2]], 1, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FNEG %0 @@ -268,12 +268,12 @@ ; GFX6-LABEL: name: fadd_s64_copy_fneg_fabs_sgpr_copy_fneg_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[PRED_COPY2]], 3, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -20,27 +20,27 @@ ; GFX8-LABEL: name: fcanonicalize_f16_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX10-LABEL: name: fcanonicalize_f16_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fcanonicalize_f16_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -63,27 +63,27 @@ ; GFX8-LABEL: name: fcanonicalize_f16_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX10-LABEL: name: fcanonicalize_f16_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fcanonicalize_f16_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -107,27 +107,27 @@ ; GFX8-LABEL: name: fcanonicalize_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -150,27 +150,27 @@ ; GFX8-LABEL: name: fcanonicalize_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -193,27 +193,27 @@ ; GFX8-LABEL: name: fcanonicalize_v2f16_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX10-LABEL: name: fcanonicalize_v2f16_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX11-LABEL: name: fcanonicalize_v2f16_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -236,27 +236,27 @@ ; GFX8-LABEL: name: fcanonicalize_v2f16_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX10-LABEL: name: fcanonicalize_v2f16_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX11-LABEL: name: fcanonicalize_v2f16_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -279,27 +279,27 @@ ; GFX8-LABEL: name: fcanonicalize_f64_denorm ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX9-LABEL: name: fcanonicalize_f64_denorm ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX10-LABEL: name: fcanonicalize_f64_denorm ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX11-LABEL: name: fcanonicalize_f64_denorm ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -322,27 +322,27 @@ ; GFX8-LABEL: name: fcanonicalize_f64_flush ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]] ; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX10-LABEL: name: fcanonicalize_f64_flush ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX11-LABEL: name: fcanonicalize_f64_flush ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -364,27 +364,27 @@ ; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -408,27 +408,27 @@ ; GFX8-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -451,27 +451,27 @@ ; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -494,27 +494,27 @@ ; GFX8-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -537,35 +537,35 @@ ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 @@ -589,35 +589,35 @@ ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: fceil_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCEIL %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: fceil_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FCEIL %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ ; CHECK-LABEL: name: fceil_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_CEIL_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FCEIL %0 $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ ; CHECK-LABEL: name: fceil_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_CEIL_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCEIL %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -17,8 +17,8 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GCN-NEXT: [[FCEIL:%[0-9]+]]:sreg_32(s16) = G_FCEIL [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FCEIL]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FCEIL]](s16) + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FCEIL %1 @@ -39,9 +39,9 @@ ; GCN-LABEL: name: fceil_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCEIL %1 @@ -62,9 +62,9 @@ ; GCN-LABEL: name: fceil_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCEIL %1 @@ -85,9 +85,9 @@ ; GCN-LABEL: name: fceil_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CEIL_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir @@ -42,17 +42,17 @@ ; WAVE64-LABEL: name: fcmp_oeq_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 @@ -70,17 +70,17 @@ ; WAVE64-LABEL: name: fcmp_ogt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 @@ -98,17 +98,17 @@ ; WAVE64-LABEL: name: fcmp_oge_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 @@ -126,17 +126,17 @@ ; WAVE64-LABEL: name: fcmp_olt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 @@ -154,17 +154,17 @@ ; WAVE64-LABEL: name: fcmp_ole_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 @@ -182,17 +182,17 @@ ; WAVE64-LABEL: name: fcmp_one_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_one_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 @@ -210,17 +210,17 @@ ; WAVE64-LABEL: name: fcmp_ord_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 @@ -238,17 +238,17 @@ ; WAVE64-LABEL: name: fcmp_uno_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 @@ -266,17 +266,17 @@ ; WAVE64-LABEL: name: fcmp_ueq_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 @@ -294,17 +294,17 @@ ; WAVE64-LABEL: name: fcmp_ugt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 @@ -322,17 +322,17 @@ ; WAVE64-LABEL: name: fcmp_uge_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 @@ -350,17 +350,17 @@ ; WAVE64-LABEL: name: fcmp_ult_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 @@ -378,17 +378,17 @@ ; WAVE64-LABEL: name: fcmp_ule_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 @@ -406,17 +406,17 @@ ; WAVE64-LABEL: name: fcmp_une_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_une_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 @@ -490,17 +490,17 @@ ; WAVE64-LABEL: name: fcmp_oeq_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 @@ -518,17 +518,17 @@ ; WAVE64-LABEL: name: fcmp_ogt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 @@ -546,17 +546,17 @@ ; WAVE64-LABEL: name: fcmp_oge_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 @@ -574,17 +574,17 @@ ; WAVE64-LABEL: name: fcmp_olt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 @@ -602,17 +602,17 @@ ; WAVE64-LABEL: name: fcmp_ole_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 @@ -630,17 +630,17 @@ ; WAVE64-LABEL: name: fcmp_one_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_one_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 @@ -658,17 +658,17 @@ ; WAVE64-LABEL: name: fcmp_ord_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 @@ -686,17 +686,17 @@ ; WAVE64-LABEL: name: fcmp_uno_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 @@ -714,17 +714,17 @@ ; WAVE64-LABEL: name: fcmp_ueq_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 @@ -742,17 +742,17 @@ ; WAVE64-LABEL: name: fcmp_ugt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 @@ -770,17 +770,17 @@ ; WAVE64-LABEL: name: fcmp_uge_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 @@ -798,17 +798,17 @@ ; WAVE64-LABEL: name: fcmp_ult_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 @@ -826,17 +826,17 @@ ; WAVE64-LABEL: name: fcmp_ule_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 @@ -854,17 +854,17 @@ ; WAVE64-LABEL: name: fcmp_une_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_une_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 @@ -910,18 +910,18 @@ ; WAVE64-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -57,24 +57,24 @@ ; WAVE64-LABEL: name: fcmp_oeq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] ; GFX11-LABEL: name: fcmp_oeq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -94,24 +94,24 @@ ; WAVE64-LABEL: name: fcmp_ogt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] ; GFX11-LABEL: name: fcmp_ogt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_GT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -131,24 +131,24 @@ ; WAVE64-LABEL: name: fcmp_oge_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] ; GFX11-LABEL: name: fcmp_oge_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_GE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -168,24 +168,24 @@ ; WAVE64-LABEL: name: fcmp_olt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] ; GFX11-LABEL: name: fcmp_olt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -205,24 +205,24 @@ ; WAVE64-LABEL: name: fcmp_ole_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ole_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -241,24 +241,24 @@ ; WAVE64-LABEL: name: fcmp_one_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_one_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; GFX11-LABEL: name: fcmp_one_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -278,24 +278,24 @@ ; WAVE64-LABEL: name: fcmp_ord_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; GFX11-LABEL: name: fcmp_ord_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -315,24 +315,24 @@ ; WAVE64-LABEL: name: fcmp_uno_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] ; GFX11-LABEL: name: fcmp_uno_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_U_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -352,24 +352,24 @@ ; WAVE64-LABEL: name: fcmp_ueq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] ; GFX11-LABEL: name: fcmp_ueq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -389,24 +389,24 @@ ; WAVE64-LABEL: name: fcmp_ugt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ugt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -426,24 +426,24 @@ ; WAVE64-LABEL: name: fcmp_uge_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] ; GFX11-LABEL: name: fcmp_uge_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -463,24 +463,24 @@ ; WAVE64-LABEL: name: fcmp_ult_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -500,24 +500,24 @@ ; WAVE64-LABEL: name: fcmp_ule_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] ; GFX11-LABEL: name: fcmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NGT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -537,24 +537,24 @@ ; WAVE64-LABEL: name: fcmp_une_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_une_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] ; GFX11-LABEL: name: fcmp_une_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NEQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -14,8 +14,8 @@ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s32) = G_FCONSTANT float 1.0 %1:vgpr(s32) = G_FCONSTANT float 8.0 @@ -39,8 +39,8 @@ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1090519040 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 3212836864 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 3238002688 - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: $sgpr1 = COPY [[S_MOV_B32_1]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr1 = PRED_COPY [[S_MOV_B32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s32) = G_FCONSTANT float 1.0 %1:sgpr(s32) = G_FCONSTANT float 8.0 @@ -73,8 +73,8 @@ ; GCN-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1076101120, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - ; GCN-NEXT: $vgpr2_vgpr3 = COPY [[REG_SEQUENCE1]] + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] + ; GCN-NEXT: $vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:vgpr(s64) = G_FCONSTANT double 1.0 %1:vgpr(s64) = G_FCONSTANT double 8.0 @@ -103,8 +103,8 @@ ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1071382528 ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_MOV_B64_]] + ; GCN-NEXT: $sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_1]], implicit [[REG_SEQUENCE1]] %0:sgpr(s64) = G_FCONSTANT double 1.0 %1:sgpr(s64) = G_FCONSTANT double 8.0 @@ -128,8 +128,8 @@ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s16) = G_FCONSTANT half 1.0 %1:vgpr(s16) = G_FCONSTANT half 8.0 @@ -156,12 +156,12 @@ ; GCN-LABEL: name: fconstant_s_s16 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - ; GCN-NEXT: $sgpr1 = COPY [[COPY1]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s16) = G_FCONSTANT half 1.0 %1:sgpr(s16) = G_FCONSTANT half 8.0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: fexp2_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FEXP2 %0 S_ENDPGM 0, implicit %1 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: fexp2_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FEXP2 %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -17,8 +17,8 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) - ; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FFLOOR]](s16) + ; VI-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FFLOOR %1 @@ -39,9 +39,9 @@ ; VI-LABEL: name: ffloor_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -62,9 +62,9 @@ ; VI-LABEL: name: ffloor_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -93,9 +93,9 @@ ; VI-LABEL: name: ffloor_fneg_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: ffloor_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: ffloor_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ ; CHECK-LABEL: name: ffloor_fneg_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FFLOOR %1 @@ -77,9 +77,9 @@ ; CHECK-LABEL: name: ffloor_fneg_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FFLOOR %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: ffloor_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -51,9 +51,9 @@ ; CHECK-LABEL: name: ffloor_fneg_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %2:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 %2:vgpr(s64) = G_FFLOOR %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -17,27 +17,27 @@ ; GFX6-LABEL: name: fma_f32 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -59,27 +59,27 @@ ; GFX6-LABEL: name: fma_f32_fneg_src0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src0 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -102,27 +102,27 @@ ; GFX6-LABEL: name: fma_f32_fneg_src1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src1 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -145,27 +145,27 @@ ; GFX6-LABEL: name: fma_f32_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -188,27 +188,27 @@ ; GFX6-LABEL: name: fma_f32_fabs_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fabs_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -231,27 +231,27 @@ ; GFX6-LABEL: name: fma_f32_copy_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %5 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_copy_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %5 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir @@ -15,18 +15,18 @@ ; GFX6-LABEL: name: fmad_f32 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] ; GFX10-LABEL: name: fmad_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -49,18 +49,18 @@ ; GFX6-LABEL: name: fmad_f32_fneg_src0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -84,18 +84,18 @@ ; GFX6-LABEL: name: fmad_f32_fneg_src1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -119,18 +119,18 @@ ; GFX6-LABEL: name: fmad_f32_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -154,18 +154,18 @@ ; GFX6-LABEL: name: fmad_f32_fabs_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -189,18 +189,18 @@ ; GFX6-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir @@ -16,23 +16,23 @@ ; GFX7-LABEL: name: fmaxnum_ieee_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -86,23 +86,23 @@ ; GFX7-LABEL: name: fmaxnum_ieee_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -16,17 +16,17 @@ ; CHECK-LABEL: name: fmaxnum_ieee_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ ; CHECK-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir @@ -15,10 +15,10 @@ ; GFX9-LABEL: name: fmaxnum_ieee_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir @@ -17,23 +17,23 @@ ; GFX7-LABEL: name: fmaxnum_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -85,23 +85,23 @@ ; GFX7-LABEL: name: fmaxnum_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -16,17 +16,17 @@ ; CHECK-LABEL: name: fmaxnum_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ ; CHECK-LABEL: name: fmaxnum_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir @@ -17,10 +17,10 @@ ; GFX9-LABEL: name: fmaxnum_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMAXNUM %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir @@ -16,23 +16,23 @@ ; GFX7-LABEL: name: fminnum_ieee_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -86,23 +86,23 @@ ; GFX7-LABEL: name: fminnum_ieee_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -16,17 +16,17 @@ ; CHECK-LABEL: name: fminnum_ieee_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ ; CHECK-LABEL: name: fminnum_ieee_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir @@ -15,10 +15,10 @@ ; GFX9-LABEL: name: fminnum_ieee_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMINNUM_IEEE %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir @@ -17,23 +17,23 @@ ; GFX7-LABEL: name: fminnum_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -85,23 +85,23 @@ ; GFX7-LABEL: name: fminnum_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -16,17 +16,17 @@ ; CHECK-LABEL: name: fminnum_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ ; CHECK-LABEL: name: fminnum_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir @@ -15,10 +15,10 @@ ; GFX9-LABEL: name: fminnum_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMINNUM %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -13,16 +13,16 @@ ; GCN-LABEL: name: fmul_f32 ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -54,13 +54,13 @@ ; GCN-LABEL: name: fmul_f64 ; GCN: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %4:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %4, implicit %5, implicit %6 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]], implicit [[V_MUL_F64_e64_1]], implicit [[V_MUL_F64_e64_2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -90,12 +90,12 @@ ; GCN-LABEL: name: fmul_f16 ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %7, implicit %8, implicit %9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -129,28 +129,28 @@ ; GCN-LABEL: name: fmul_modifiers_f32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %11:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_4]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_5]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_6]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_7]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(p1) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir @@ -15,10 +15,10 @@ ; GFX9-LABEL: name: fmul_v2f16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMUL %0, %1 @@ -37,10 +37,10 @@ ; GFX9-LABEL: name: fmul_v2f16_fneg_v_fneg_v ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %4:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[PRED_COPY]], 11, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FNEG %0 @@ -61,15 +61,15 @@ ; GFX9-LABEL: name: fmul_v2f16_fneg_lo_v_v ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[V_XOR_B32_e64_]], implicit $exec - ; GFX9-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[COPY2]], 16, [[V_AND_B32_e32_]], implicit $exec - ; GFX9-NEXT: %7:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %7 + ; GFX9-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[PRED_COPY2]], 16, [[V_AND_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -23,31 +23,31 @@ ; SI-LABEL: name: fneg_s32_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 $sgpr0 = COPY %1 @@ -71,31 +71,31 @@ ; SI-LABEL: name: fneg_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -120,25 +120,25 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; VI-LABEL: name: fneg_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; GFX9-LABEL: name: fneg_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; GFX10-LABEL: name: fneg_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -162,31 +162,31 @@ ; SI-LABEL: name: fneg_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -212,31 +212,31 @@ ; SI-LABEL: name: fneg_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -267,32 +267,32 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fneg_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fneg_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fneg_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -318,31 +318,31 @@ ; SI-LABEL: name: fneg_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FNEG %0 $sgpr0 = COPY %1 @@ -366,31 +366,31 @@ ; SI-LABEL: name: fneg_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -415,25 +415,25 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; VI-LABEL: name: fneg_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; GFX9-LABEL: name: fneg_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; GFX10-LABEL: name: fneg_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -460,42 +460,42 @@ ; SI-LABEL: name: fneg_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FNEG %0 @@ -523,42 +523,42 @@ ; SI-LABEL: name: fneg_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 @@ -627,30 +627,30 @@ ; SI-LABEL: name: fneg_fabs_s32_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_s32_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_s32_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_s32_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 @@ -676,30 +676,30 @@ ; SI-LABEL: name: fneg_fabs_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 @@ -779,31 +779,31 @@ ; SI-LABEL: name: fneg_fabs_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -831,35 +831,35 @@ ; SI-LABEL: name: fneg_fabs_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; SI-NEXT: $vgpr0 = COPY [[COPY1]] + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; VI-LABEL: name: fneg_fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; VI-NEXT: $vgpr0 = COPY [[COPY1]] + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; GFX9-LABEL: name: fneg_fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; GFX10-LABEL: name: fneg_fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -893,8 +893,8 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fneg_fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -902,8 +902,8 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fneg_fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -911,8 +911,8 @@ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fneg_fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -920,8 +920,8 @@ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -948,31 +948,31 @@ ; SI-LABEL: name: fneg_fabs_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 %2:sgpr(<2 x s16>) = G_FNEG %1 @@ -997,31 +997,31 @@ ; SI-LABEL: name: fneg_fabs_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %0 @@ -1051,7 +1051,7 @@ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; VI-LABEL: name: fneg_fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -1059,7 +1059,7 @@ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; GFX9-LABEL: name: fneg_fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -1067,7 +1067,7 @@ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; GFX10-LABEL: name: fneg_fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -1075,7 +1075,7 @@ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %1 @@ -1103,42 +1103,42 @@ ; SI-LABEL: name: fneg_fabs_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 @@ -1167,42 +1167,42 @@ ; SI-LABEL: name: fneg_fabs_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -16,21 +16,21 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -49,21 +49,21 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -82,21 +82,21 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -116,24 +116,24 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -153,24 +153,24 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -190,30 +190,30 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -234,24 +234,24 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -272,24 +272,24 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -310,30 +310,30 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -16,33 +16,33 @@ ; GCN-LABEL: name: fptoui ; GCN: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GCN-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; VI-LABEL: name: fptoui ; VI: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; VI-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX11-LABEL: name: fptoui ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -72,24 +72,24 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -109,24 +109,24 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -146,30 +146,30 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -190,24 +190,24 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -228,24 +228,24 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -266,30 +266,30 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -15,20 +15,20 @@ ; CHECK-LABEL: name: fract_f64_neg ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY3]], 1, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_FRACT_F64_e64_]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 @@ -65,20 +65,20 @@ ; CHECK-LABEL: name: fract_f64_neg_abs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY3]], 3, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_FRACT_F64_e64_]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir @@ -13,7 +13,7 @@ bb.0: ; GCN-LABEL: name: frame_index_s ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] %0:sgpr(p5) = G_FRAME_INDEX %stack.0 $sgpr0 = COPY %0 @@ -31,7 +31,7 @@ bb.0: ; GCN-LABEL: name: frame_index_v ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 $vgpr0 = COPY %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir @@ -16,13 +16,13 @@ ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0(s32) %2:vgpr(s1) = G_FREEZE %1 @@ -43,13 +43,13 @@ ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_agpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0(s32) %2:vgpr(s1) = G_FREEZE %1 @@ -70,19 +70,19 @@ ; GFX6-LABEL: name: test_freeze_s1_vcc ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX10-LABEL: name: test_freeze_s1_vcc ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_ICMP intpred(eq), %0(s32), %1 @@ -103,13 +103,13 @@ ; GFX6-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0(s32) %2:vgpr(s16) = G_FREEZE %1 @@ -130,13 +130,13 @@ ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -155,13 +155,13 @@ ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $sgpr0 = COPY %1(s32) @@ -180,13 +180,13 @@ ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -205,13 +205,13 @@ ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_agpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -230,13 +230,13 @@ ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_agpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_agpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -255,13 +255,13 @@ ; GFX6-LABEL: name: test_freeze_s32_agpr_to_vgpr ; GFX6: liveins: $agpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_agpr_to_vgpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:agpr(s32) = COPY $agpr0 %1:agpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -280,13 +280,13 @@ ; GFX6-LABEL: name: test_freeze_s32_agpr_to_agpr ; GFX6: liveins: $agpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_agpr_to_agpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:agpr(s32) = COPY $agpr0 %1:agpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -305,13 +305,13 @@ ; GFX6-LABEL: name: test_freeze_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(s64) @@ -330,13 +330,13 @@ ; GFX6-LABEL: name: test_freeze_s128 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s128 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s128) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) @@ -355,13 +355,13 @@ ; GFX6-LABEL: name: test_freeze_256 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_256 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s256) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) @@ -380,13 +380,13 @@ ; GFX6-LABEL: name: test_freeze_s512 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s512 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(s512) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) @@ -405,13 +405,13 @@ ; GFX6-LABEL: name: test_freeze_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(<2 x s32>) @@ -430,13 +430,13 @@ ; GFX6-LABEL: name: test_freeze_v3s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v3s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(<3 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) @@ -455,13 +455,13 @@ ; GFX6-LABEL: name: test_freeze_v4s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v4s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<4 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) @@ -480,13 +480,13 @@ ; GFX6-LABEL: name: test_freeze_v5s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v5s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[PRED_COPY]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr(<5 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) @@ -505,13 +505,13 @@ ; GFX6-LABEL: name: test_freeze_v8s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v8s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(<8 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) @@ -530,13 +530,13 @@ ; GFX6-LABEL: name: test_freeze_v16s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v16s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(<16 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) @@ -555,13 +555,13 @@ ; GFX6-LABEL: name: test_freeze_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FREEZE %0 $vgpr0 = COPY %1(<2 x s16>) @@ -580,13 +580,13 @@ ; GFX6-LABEL: name: test_freeze_v4s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(<4 x s16>) @@ -605,13 +605,13 @@ ; GFX6-LABEL: name: test_freeze_v6s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v6s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] %0:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(<6 x s16>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) @@ -630,13 +630,13 @@ ; GFX6-LABEL: name: test_freeze_v8s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v8s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<8 x s16>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) @@ -655,13 +655,13 @@ ; GFX6-LABEL: name: test_freeze_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<2 x s64>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) @@ -680,13 +680,13 @@ ; GFX6-LABEL: name: test_freeze_p0 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p0 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p0) @@ -705,13 +705,13 @@ ; GFX6-LABEL: name: test_freeze_p1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p1) @@ -730,13 +730,13 @@ ; GFX6-LABEL: name: test_freeze_p2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(p2) = G_FREEZE %0 $vgpr0 = COPY %1(p2) @@ -755,13 +755,13 @@ ; GFX6-LABEL: name: test_freeze_p3 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_FREEZE %0 $vgpr0 = COPY %1(p3) @@ -780,13 +780,13 @@ ; GFX6-LABEL: name: test_freeze_p4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p4) = COPY $vgpr0_vgpr1 %1:vgpr(p4) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p4) @@ -805,13 +805,13 @@ ; GFX6-LABEL: name: test_freeze_p5 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p5 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = G_FREEZE %0 $vgpr0 = COPY %1(p5) @@ -830,13 +830,13 @@ ; GFX6-LABEL: name: test_freeze_p999 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p999 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p999) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir @@ -15,9 +15,9 @@ ; GCN-LABEL: name: frint_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FRINT %0 $vgpr0 = COPY %1 @@ -36,9 +36,9 @@ ; GCN-LABEL: name: frint_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FRINT %0 $vgpr0 = COPY %1 @@ -57,9 +57,9 @@ ; GCN-LABEL: name: frint_fneg_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FRINT %1 @@ -79,9 +79,9 @@ ; GCN-LABEL: name: frint_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %1:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FRINT %0 $vgpr0_vgpr1 = COPY %1 @@ -100,9 +100,9 @@ ; GCN-LABEL: name: frint_s64_fneg_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 %2:vgpr(s64) = G_FRINT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir @@ -17,8 +17,8 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GCN-NEXT: [[FRINT:%[0-9]+]]:sreg_32(s16) = G_FRINT [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FRINT]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FRINT]](s16) + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FRINT %1 @@ -39,9 +39,9 @@ ; GCN-LABEL: name: frint_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FRINT %1 @@ -62,9 +62,9 @@ ; GCN-LABEL: name: frint_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FRINT %1 @@ -85,9 +85,9 @@ ; GCN-LABEL: name: frint_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir @@ -18,10 +18,10 @@ ; GCN-LABEL: name: fshr_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir @@ -15,38 +15,38 @@ ; WAVE64-LABEL: name: i1_vcc_to_vcc_copy ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; WAVE64-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY5]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY5]], implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 ; WAVE32-LABEL: name: i1_vcc_to_vcc_copy ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY5]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY5]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -78,46 +78,46 @@ ; WAVE64-LABEL: name: i1_sgpr_to_vcc_copy ; WAVE64: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; WAVE64-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 ; WAVE32-LABEL: name: i1_sgpr_to_vcc_copy ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -9,21 +9,21 @@ regBankSelected: true # GCN: name: icmp_s32_s_mix -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 -# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 -# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2 -# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3 -# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4 -# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5 -# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6 -# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 +# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 +# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 +# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 +# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 +# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 +# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 +# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 # GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc -# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32 = COPY $scc +# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32 = PRED_COPY $scc # GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc -# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32 = COPY $scc -# GCN: $scc = COPY [[COND0]] +# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32 = PRED_COPY $scc +# GCN: $scc = PRED_COPY [[COND0]] # GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc -# GCN: $scc = COPY [[COND1]] +# GCN: $scc = PRED_COPY [[COND1]] # GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc body: | @@ -123,14 +123,14 @@ regBankSelected: true # GCN-LABEL: name: icmp_s32_v_mix -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 -# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 -# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 -# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 -# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 -# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 -# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 +# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 +# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 +# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 +# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 +# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 +# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr9 # GCN: [[COND0:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] # GCN: [[COND1:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]] # GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]] @@ -220,8 +220,8 @@ regBankSelected: true # GCN-LABEL: name: icmp_s32_vv -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 # GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] body: | @@ -244,8 +244,8 @@ regBankSelected: true # GCN-LABEL: name: icmp_s32_vs -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]] body: | @@ -268,8 +268,8 @@ regBankSelected: true # GCN-LABEL: name: icmp_s32_sv -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] body: | @@ -292,8 +292,8 @@ regBankSelected: true # GCN-LABEL: name: icmp_s32_or_vcc -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] body: | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -16,23 +16,23 @@ ; WAVE64-LABEL: name: icmp_eq_s16_sv ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -55,23 +55,23 @@ ; WAVE64-LABEL: name: icmp_eq_s16_vs ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -94,23 +94,23 @@ ; WAVE64-LABEL: name: icmp_eq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -133,23 +133,23 @@ ; WAVE64-LABEL: name: icmp_ne_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ne_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; GFX11-LABEL: name: icmp_ne_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -172,23 +172,23 @@ ; WAVE64-LABEL: name: icmp_slt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; WAVE32-LABEL: name: icmp_slt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; GFX11-LABEL: name: icmp_slt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -211,23 +211,23 @@ ; WAVE64-LABEL: name: icmp_sle_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; WAVE32-LABEL: name: icmp_sle_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; GFX11-LABEL: name: icmp_sle_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -250,23 +250,23 @@ ; WAVE64-LABEL: name: icmp_ult_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; WAVE32-LABEL: name: icmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; GFX11-LABEL: name: icmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -289,23 +289,23 @@ ; WAVE64-LABEL: name: icmp_ule_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; GFX11-LABEL: name: icmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir @@ -31,11 +31,11 @@ ; GFX8-LABEL: name: icmp_eq_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -62,11 +62,11 @@ ; GFX8-LABEL: name: icmp_ne_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_LG_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_LG_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_ne_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -123,20 +123,20 @@ ; GFX8-LABEL: name: icmp_eq_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -159,20 +159,20 @@ ; GFX8-LABEL: name: icmp_ne_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_NE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ne_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_NE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -195,20 +195,20 @@ ; GFX8-LABEL: name: icmp_sgt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sgt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -231,20 +231,20 @@ ; GFX8-LABEL: name: icmp_sge_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -267,20 +267,20 @@ ; GFX8-LABEL: name: icmp_slt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_slt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -303,20 +303,20 @@ ; GFX8-LABEL: name: icmp_sle_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sle_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -339,20 +339,20 @@ ; GFX8-LABEL: name: icmp_ugt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ugt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -375,20 +375,20 @@ ; GFX8-LABEL: name: icmp_uge_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_uge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -411,20 +411,20 @@ ; GFX8-LABEL: name: icmp_ult_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ult_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -447,20 +447,20 @@ ; GFX8-LABEL: name: icmp_ule_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ule_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -483,11 +483,11 @@ ; GFX8-LABEL: name: icmp_eq_p0_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p0_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -513,11 +513,11 @@ ; GFX8-LABEL: name: icmp_eq_p1_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p1_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -544,11 +544,11 @@ ; GFX8-LABEL: name: icmp_eq_p999_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p999_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -575,20 +575,20 @@ ; GFX8-LABEL: name: icmp_eq_p0_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p0_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 @@ -611,20 +611,20 @@ ; GFX8-LABEL: name: icmp_eq_p1_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p1_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 @@ -647,20 +647,20 @@ ; GFX8-LABEL: name: icmp_eq_p999_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p999_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -16,20 +16,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v2s32 ; MOVREL: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v2s32 ; GPRIDX: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -50,20 +50,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v3s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v3s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr3 @@ -84,20 +84,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr3 @@ -118,20 +118,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v5s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v5s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] %0:sgpr(<5 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s32) = COPY $sgpr5 @@ -152,20 +152,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -186,20 +186,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v16s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16, $sgpr17 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr16 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr17 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v16s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16, $sgpr17 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr16 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr17 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr16 @@ -220,20 +220,20 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40, $sgpr41 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr41 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40, $sgpr41 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr41 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -254,20 +254,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v2s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v2s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -288,20 +288,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v4s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr10 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v4s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr10 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr8_sgpr9 @@ -322,20 +322,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v8s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr16_sgpr17 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr18 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v8s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr16_sgpr17 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr18 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s64) = COPY $sgpr16_sgpr17 @@ -356,20 +356,20 @@ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr42 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr40_sgpr41 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr42 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr42 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr40_sgpr41 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr42 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s64) = COPY $sgpr40_sgpr41 @@ -390,19 +390,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; MOVREL: liveins: $vgpr0_vgpr1, $vgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; GPRIDX: liveins: $vgpr0_vgpr1, $vgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -423,19 +423,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s32) = COPY $vgpr3 @@ -456,19 +456,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s32) = COPY $vgpr3 @@ -489,19 +489,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr(s32) = COPY $vgpr5 @@ -522,19 +522,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -555,19 +555,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -590,23 +590,23 @@ ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -629,20 +629,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -665,24 +665,24 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -707,20 +707,20 @@ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -741,19 +741,19 @@ ; MOVREL-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG13:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13 ; CHECK-NEXT: [[INSERT_SUBREG14:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14 ; CHECK-NEXT: [[INSERT_SUBREG15:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]] + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[INSERT_SUBREG15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_IMPLICIT_DEF @@ -64,9 +64,9 @@ ; CHECK-LABEL: name: insert_v_s64_v_s32_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -86,9 +86,9 @@ ; CHECK-LABEL: name: insert_v_s64_v_s32_32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -108,9 +108,9 @@ ; CHECK-LABEL: name: insert_s_s64_s_s32_0 ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -130,9 +130,9 @@ ; CHECK-LABEL: name: insert_s_s64_s_s32_32 ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -152,9 +152,9 @@ ; CHECK-LABEL: name: insert_s_s64_v_s32_32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -174,9 +174,9 @@ ; CHECK-LABEL: name: insert_v_s64_s_s32_32 ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -196,9 +196,9 @@ ; CHECK-LABEL: name: insert_v_s96_v_s64_0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -218,9 +218,9 @@ ; CHECK-LABEL: name: insert_v_s96_v_s64_32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -240,9 +240,9 @@ ; CHECK-LABEL: name: insert_s_s96_s_s64_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -262,9 +262,9 @@ ; CHECK-LABEL: name: insert_s_s96_s_s64_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -284,9 +284,9 @@ ; CHECK-LABEL: name: insert_s_s128_s_s64_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -321,9 +321,9 @@ ; CHECK-LABEL: name: insert_s_s128_s_s64_64 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -343,9 +343,9 @@ ; CHECK-LABEL: name: insert_s_v256_v_s64_96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s64) = COPY $vgpr8_vgpr9 @@ -365,9 +365,9 @@ ; CHECK-LABEL: name: insert_s_s256_s_s64_128 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -402,9 +402,9 @@ ; CHECK-LABEL: name: insert_s_s128_s_s96_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -424,9 +424,9 @@ ; CHECK-LABEL: name: insert_s_s128_s_s96_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -446,9 +446,9 @@ ; CHECK-LABEL: name: insert_s_s160_s_s96_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -468,9 +468,9 @@ ; CHECK-LABEL: name: insert_s_s160_s_s96_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -490,9 +490,9 @@ ; CHECK-LABEL: name: insert_s_s160_s_s96_64 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -513,9 +513,9 @@ ; CHECK-LABEL: name: insert_s_s256_s_s128_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -536,9 +536,9 @@ ; CHECK-LABEL: name: insert_v_s256_v_s128_32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -559,9 +559,9 @@ ; CHECK-LABEL: name: insert_v_s256_v_s128_64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3_sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -582,9 +582,9 @@ ; CHECK-LABEL: name: insert_v_s256_v_s128_96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub3_sub4_sub5_sub6 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -605,9 +605,9 @@ ; CHECK-LABEL: name: insert_v_s256_v_s128_128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: intrinsic_trunc_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ ; CHECK-LABEL: name: intrinsic_trunc_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ ; CHECK-LABEL: name: intrinsic_trunc_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_TRUNC_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ ; CHECK-LABEL: name: intrinsic_trunc_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_TRUNC_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: intrinsic_trunc_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: intrinsic_trunc_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 @@ -60,9 +60,9 @@ ; GCN-LABEL: name: intrinsic_trunc_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir @@ -7,10 +7,10 @@ regBankSelected: true # GCN-LABEL: name: inttoptr -# GCN: [[S64:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 -# GCN: [[V64:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +# GCN: [[S64:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 +# GCN: [[V64:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 # GCN: [[VAL:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S64]], 0, 0 -# GCN: [[V_VAL:%[0-9]+]]:vgpr_32 = COPY [[VAL]] +# GCN: [[V_VAL:%[0-9]+]]:vgpr_32 = PRED_COPY [[VAL]] # GCN: FLAT_STORE_DWORD [[V64]], [[V_VAL]] # diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -18,27 +18,27 @@ ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -61,25 +61,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -102,25 +102,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -141,27 +141,27 @@ ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -184,25 +184,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -225,25 +225,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -266,25 +266,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -307,25 +307,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -346,67 +346,67 @@ ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p0) = G_PTR_ADD %0, %1 @@ -429,47 +429,47 @@ ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p0) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -20,43 +20,43 @@ ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -79,31 +79,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -126,31 +126,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_global_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -171,43 +171,43 @@ ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -230,31 +230,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -277,31 +277,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -324,31 +324,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_global_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -371,31 +371,31 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_global_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -416,73 +416,73 @@ ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -505,63 +505,63 @@ ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -584,73 +584,73 @@ ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -19,23 +19,23 @@ ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -59,20 +59,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -96,20 +96,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -130,23 +130,23 @@ ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -170,20 +170,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -207,20 +207,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -244,20 +244,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -281,20 +281,20 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -315,25 +315,25 @@ ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -20,27 +20,27 @@ ; GFX6-LABEL: name: load_constant_s32_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -62,27 +62,27 @@ ; GFX6-LABEL: name: load_constant_v2s16_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -103,27 +103,27 @@ ; GFX6-LABEL: name: load_constant_v2s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -143,27 +143,27 @@ ; GFX6-LABEL: name: load_constant_v2s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -183,27 +183,27 @@ ; GFX6-LABEL: name: load_constant_v4s16_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -224,27 +224,27 @@ ; GFX6-LABEL: name: load_constant_v4s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -265,27 +265,27 @@ ; GFX6-LABEL: name: load_constant_s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -306,27 +306,27 @@ ; GFX6-LABEL: name: load_constant_s64_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -347,27 +347,27 @@ ; GFX6-LABEL: name: load_constant_v2s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -390,25 +390,25 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_constant_v2p1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_constant_v2p1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_constant_v2p1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -431,25 +431,25 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -470,27 +470,27 @@ ; GFX6-LABEL: name: load_constant_p3_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_p3_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_p3_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_p3_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -511,27 +511,27 @@ ; GFX6-LABEL: name: load_constant_p4_from_8 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_p4_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_p4_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_p4_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -554,25 +554,25 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_constant_p999_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_constant_p999_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_constant_p999_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -595,25 +595,25 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_constant_v2p3 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_constant_v2p3 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_constant_v2p3 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -634,27 +634,27 @@ ; GFX6-LABEL: name: load_constant_v2s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -675,27 +675,27 @@ ; GFX6-LABEL: name: load_constant_v4s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -716,27 +716,27 @@ ; GFX6-LABEL: name: load_constant_v8s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -757,27 +757,27 @@ ; GFX6-LABEL: name: load_constant_v8s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX7-LABEL: name: load_constant_v8s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX8-LABEL: name: load_constant_v8s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX10-LABEL: name: load_constant_v8s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1 @@ -798,27 +798,27 @@ ; GFX6-LABEL: name: load_constant_v16s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v16s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v16s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v16s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -839,27 +839,27 @@ ; GFX6-LABEL: name: load_constant_v8s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v8s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v8s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v8s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -884,27 +884,27 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1020 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -927,28 +927,28 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[PRED_COPY]], 256, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1024 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -971,30 +971,30 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1048575, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048575 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1017,30 +1017,30 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[PRED_COPY]], 262144, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048576 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1063,31 +1063,31 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1073741823 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1110,51 +1110,51 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], -1, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1177,57 +1177,57 @@ ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], -524288, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -524288 %2:sgpr(p4) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -20,33 +20,33 @@ ; GFX7-LABEL: name: load_flat_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -67,33 +67,33 @@ ; GFX7-LABEL: name: load_flat_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_flat_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_flat_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX10-LABEL: name: load_flat_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX11-LABEL: name: load_flat_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -114,33 +114,33 @@ ; GFX7-LABEL: name: load_flat_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -161,33 +161,33 @@ ; GFX7-LABEL: name: load_flat_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -208,33 +208,33 @@ ; GFX7-LABEL: name: load_flat_v3s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX11-LABEL: name: load_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -255,33 +255,33 @@ ; GFX7-LABEL: name: load_flat_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -302,33 +302,33 @@ ; GFX7-LABEL: name: load_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -349,33 +349,33 @@ ; GFX7-LABEL: name: load_flat_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -398,31 +398,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_flat_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -445,31 +445,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX11-LABEL: name: load_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -492,31 +492,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -537,33 +537,33 @@ ; GFX7-LABEL: name: load_flat_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_p3_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -584,33 +584,33 @@ ; GFX7-LABEL: name: load_flat_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_p1_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -633,31 +633,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_flat_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_flat_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_flat_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX11-LABEL: name: load_flat_p999_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -680,31 +680,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_flat_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_flat_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_flat_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX11-LABEL: name: load_flat_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -725,33 +725,33 @@ ; GFX7-LABEL: name: load_flat_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -772,33 +772,33 @@ ; GFX7-LABEL: name: load_flat_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -821,31 +821,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX11-LABEL: name: load_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -866,33 +866,33 @@ ; GFX7-LABEL: name: load_flat_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -917,63 +917,63 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -996,63 +996,63 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1075,83 +1075,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1174,83 +1174,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1273,63 +1273,63 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1352,83 +1352,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1451,83 +1451,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1550,83 +1550,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1649,83 +1649,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1748,83 +1748,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1847,83 +1847,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1946,83 +1946,83 @@ ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -16,24 +16,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -56,24 +56,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -99,24 +99,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -142,54 +142,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX9-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX9-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX11-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -215,44 +215,44 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec - ; GFX10-NEXT: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY7]], [[PRED_COPY8]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY9]], [[PRED_COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -280,44 +280,44 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec - ; GFX10-NEXT: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY7]], [[PRED_COPY8]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY9]], [[PRED_COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -343,24 +343,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4096 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -383,24 +383,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -423,54 +423,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -493,24 +493,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -533,34 +533,34 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294965247 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -582,24 +582,24 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -621,54 +621,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -691,54 +691,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294971390 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -761,54 +761,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -830,54 +830,54 @@ ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -897,19 +897,19 @@ bb.0: ; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = G_IMPLICIT_DEF %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -928,15 +928,15 @@ ; GFX9-LABEL: name: load_global_s32_from_undef_vgpr ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_undef_vgpr ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_undef_vgpr ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -23,55 +23,55 @@ ; GFX6-LABEL: name: load_global_s32_from_4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -92,55 +92,55 @@ ; GFX6-LABEL: name: load_global_s32_from_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_global_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_global_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] ; GFX10-LABEL: name: load_global_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] ; GFX11-LABEL: name: load_global_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) $vgpr0 = COPY %1 @@ -161,55 +161,55 @@ ; GFX6-LABEL: name: load_global_s32_from_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %1 @@ -230,55 +230,55 @@ ; GFX6-LABEL: name: load_global_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -299,55 +299,55 @@ ; GFX6-LABEL: name: load_global_v4s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-LABEL: name: load_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -370,43 +370,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_global_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -429,43 +429,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x s64>) ; GFX7-LABEL: name: load_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX7-FLAT-LABEL: name: load_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -488,43 +488,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_global_v2p1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-FLAT-LABEL: name: load_global_v2p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_global_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_global_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_global_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_global_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -547,43 +547,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_global_s128 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_global_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -606,43 +606,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_global_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_p3_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_p3_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -665,43 +665,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_global_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_p1_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_p1_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -724,43 +724,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_global_p999_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-FLAT-LABEL: name: load_global_p999_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_global_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_global_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_global_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX11-LABEL: name: load_global_p999_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -783,43 +783,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_global_v2p3 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-FLAT-LABEL: name: load_global_v2p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_global_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_global_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_global_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX11-LABEL: name: load_global_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -842,43 +842,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -901,43 +901,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -960,43 +960,43 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<8 x s16>) ; GFX7-LABEL: name: load_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX7-FLAT-LABEL: name: load_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -1021,75 +1021,75 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1112,85 +1112,85 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1213,95 +1213,95 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1324,95 +1324,95 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1435,85 +1435,85 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1536,107 +1536,107 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1659,105 +1659,105 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1780,105 +1780,105 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1901,107 +1901,107 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2024,107 +2024,107 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2147,125 +2147,125 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2288,125 +2288,125 @@ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir @@ -20,38 +20,38 @@ ; GFX7-LABEL: name: load_global_v3s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -74,31 +74,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX7-FLAT-LABEL: name: load_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_global_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_global_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_global_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -121,31 +121,31 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX7-FLAT-LABEL: name: load_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -18,28 +18,28 @@ ; GFX7-LABEL: name: load_local_v4s32_align16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] + ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_]] ; GFX9-LABEL: name: load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -60,28 +60,28 @@ ; GFX7-LABEL: name: load_local_v4s32_align_8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -102,28 +102,28 @@ ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 400 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -146,36 +146,36 @@ ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4000 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -198,28 +198,28 @@ ; GFX7-LABEL: name: load_local_v2s64 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v2s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v2s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v2s64 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -243,25 +243,25 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_local_v2p1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_local_v2p1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_local_v2p1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -285,25 +285,25 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_local_s128 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_local_s128 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_local_s128 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -324,28 +324,28 @@ ; GFX7-LABEL: name: load_local_v8s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v8s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v8s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v8s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -20,29 +20,29 @@ ; GFX6-LABEL: name: load_local_s32_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_s32_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -63,29 +63,29 @@ ; GFX6-LABEL: name: load_local_s32_from_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX7-LABEL: name: load_local_s32_from_2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: load_local_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -109,29 +109,29 @@ ; GFX6-LABEL: name: load_local_s32_from_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -152,29 +152,29 @@ ; GFX6-LABEL: name: load_local_v2s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v2s32 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v2s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v2s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -198,26 +198,26 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_local_v2s32_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -238,29 +238,29 @@ ; GFX6-LABEL: name: load_local_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_s64 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -284,26 +284,26 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -324,29 +324,29 @@ ; GFX6-LABEL: name: load_local_p3_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p3_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p3_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -367,29 +367,29 @@ ; GFX6-LABEL: name: load_local_p5_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p5_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p5_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -410,29 +410,29 @@ ; GFX6-LABEL: name: load_local_p1_align8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_p1_align8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -456,26 +456,26 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_local_p1_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -499,26 +499,26 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_local_p999_from_8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_local_p999_from_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_local_p999_from_8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -542,26 +542,26 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_local_v2p3 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_local_v2p3 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_local_v2p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -582,29 +582,29 @@ ; GFX6-LABEL: name: load_local_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_v2s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -625,29 +625,29 @@ ; GFX6-LABEL: name: load_local_v4s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v4s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v4s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -692,31 +692,31 @@ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -739,37 +739,37 @@ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX7-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX7-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 @@ -795,37 +795,37 @@ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65536 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -848,37 +848,37 @@ ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -1 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -906,26 +906,26 @@ ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1016 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -953,32 +953,32 @@ ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1020 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -22,21 +22,21 @@ ; GFX6-LABEL: name: load_private_s32_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -60,21 +60,21 @@ ; GFX6-LABEL: name: load_private_s32_from_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_USHORT]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -98,21 +98,21 @@ ; GFX6-LABEL: name: load_private_s32_from_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -136,21 +136,21 @@ ; GFX6-LABEL: name: load_private_p3_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_p3_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -174,21 +174,21 @@ ; GFX6-LABEL: name: load_private_p5_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_p5_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -213,21 +213,21 @@ ; GFX6-LABEL: name: load_private_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -255,23 +255,23 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -297,27 +297,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 @@ -346,23 +346,23 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -388,25 +388,25 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -432,25 +432,25 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -476,23 +476,23 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -518,27 +518,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -564,25 +564,25 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -608,25 +608,25 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -652,27 +652,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -698,27 +698,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -744,27 +744,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -790,27 +790,27 @@ ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -834,14 +834,14 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -863,14 +863,14 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:sgpr(p5) = G_CONSTANT i32 16 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -892,14 +892,14 @@ ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -922,15 +922,15 @@ ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -954,13 +954,13 @@ ; GFX6-LABEL: name: load_private_s32_from_fi ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_fi ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -983,13 +983,13 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -1015,13 +1015,13 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:sgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(s32) = COPY %1 @@ -1049,19 +1049,19 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SVS]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SVS]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -1087,15 +1087,15 @@ ; GFX6-LABEL: name: load_private_s32_from_neg1 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 -1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -17,7 +17,7 @@ regBankSelected: true # GCN: body: -# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 +# GCN: [[PTR:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 # Immediate offset: # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 @@ -48,11 +48,11 @@ # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -62,11 +62,11 @@ # GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -80,11 +80,11 @@ # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -92,11 +92,11 @@ # Pointer loads # GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS0]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS0]] # GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS1]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS1]] # GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS4]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS4]] body: | bb.0: @@ -171,8 +171,8 @@ %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p1) = COPY $sgpr2_sgpr3 - ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 + ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = PRED_COPY $sgpr0_sgpr1 + ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = PRED_COPY $sgpr2_sgpr3 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 @@ -239,8 +239,8 @@ # Test a load with a (register + immediate) offset. # GCN-LABEL: name: smrd_sgpr_imm{{$}} -# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1 -# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +# GFX9-DAG: %[[BASE:.*]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 +# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 # GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, name: smrd_sgpr_imm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -17,37 +17,37 @@ ; GFX6-LABEL: name: lshr_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX7-LABEL: name: lshr_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX8-LABEL: name: lshr_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX9-LABEL: name: lshr_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX10-LABEL: name: lshr_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ ; GFX6-LABEL: name: lshr_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ ; GFX6-LABEL: name: lshr_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ ; GFX6-LABEL: name: lshr_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ ; GFX6-LABEL: name: lshr_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX7-LABEL: name: lshr_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX8-LABEL: name: lshr_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX9-LABEL: name: lshr_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX10-LABEL: name: lshr_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ ; GFX6-LABEL: name: lshr_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ ; GFX6-LABEL: name: lshr_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ ; GFX6-LABEL: name: lshr_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -83,30 +83,30 @@ ; GFX8-LABEL: name: lshr_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,30 +176,30 @@ ; GFX8-LABEL: name: lshr_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,31 +221,31 @@ ; GFX8-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -421,30 +421,30 @@ ; GFX8-LABEL: name: lshr_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir @@ -79,16 +79,16 @@ ; GFX9-LABEL: name: lshr_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ ; GFX9-LABEL: name: lshr_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ ; GFX9-LABEL: name: lshr_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir @@ -13,18 +13,18 @@ ; GFX10-LABEL: name: mad_u64_u32_vvv ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] ; GFX11-LABEL: name: mad_u64_u32_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -46,18 +46,18 @@ ; GFX10-LABEL: name: mad_i64_i32_vvv ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] ; GFX11-LABEL: name: mad_i64_i32_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: test_merge_values_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -37,9 +37,9 @@ ; GCN-LABEL: name: test_merge_values_v_s64_s_s32_v_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -60,9 +60,9 @@ ; GCN-LABEL: name: test_merge_values_v_s64_v_s32_s_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -83,9 +83,9 @@ ; GCN-LABEL: name: test_merge_values_s_s64_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -105,11 +105,11 @@ ; GCN-LABEL: name: test_merge_values_s_s96_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -130,11 +130,11 @@ ; GCN-LABEL: name: test_merge_values_v_s96_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -155,12 +155,12 @@ ; GCN-LABEL: name: test_merge_values_s_s128_s_s32_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -182,12 +182,12 @@ ; GCN-LABEL: name: test_merge_values_v_s128_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -209,10 +209,10 @@ ; GCN-LABEL: name: test_merge_values_s_s128_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %4:sgpr(s128) = G_MERGE_VALUES %0, %1 @@ -232,10 +232,10 @@ ; GCN-LABEL: name: test_merge_values_v_s128_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s128) = G_MERGE_VALUES %0, %1 @@ -255,13 +255,13 @@ ; GCN-LABEL: name: test_merge_values_s_s160_s_s32_s_s32_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -284,13 +284,13 @@ ; GCN-LABEL: name: test_merge_values_v_s160_v_s32_v_s32_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -313,10 +313,10 @@ ; GCN-LABEL: name: test_merge_values_s_s192_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -338,10 +338,10 @@ ; GCN-LABEL: name: test_merge_values_v_s192_v_s64_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -363,12 +363,12 @@ ; GCN-LABEL: name: test_merge_values_s_s256_s_s64_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -390,10 +390,10 @@ ; GCN-LABEL: name: test_merge_values_s_s256_s_s128_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s128) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(s256) = G_MERGE_VALUES %0, %1 @@ -413,10 +413,10 @@ ; GCN-LABEL: name: test_merge_values_s_s512_s_s256_s_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(s256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(s512) = G_MERGE_VALUES %0, %1 @@ -436,16 +436,16 @@ ; GCN-LABEL: name: test_merge_values_s_s512_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $sgpr12_sgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr12_sgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -471,16 +471,16 @@ ; GCN-LABEL: name: test_merge_values_v_v512_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -506,9 +506,9 @@ ; GCN-LABEL: name: test_merge_values_rc_already_set_src_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr_32(s32) = COPY $vgpr0 %1:vgpr_32(s32) = COPY $vgpr1 @@ -529,9 +529,9 @@ ; GCN-LABEL: name: test_merge_values_rc_already_set_dst_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -552,11 +552,11 @@ ; GCN-LABEL: name: test_merge_values_s_s1024_s_s256_s_s256_s_s256_s_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(s256) = G_IMPLICIT_DEF @@ -580,10 +580,10 @@ ; GCN-LABEL: name: test_merge_values_s_s1024_s_s512 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s512) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %2:sgpr(s1024) = G_MERGE_VALUES %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir @@ -12,9 +12,9 @@ ; GCN-LABEL: name: mul_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY]], [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[PRED_COPY]], [[PRED_COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MUL_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -33,9 +33,9 @@ ; GCN-LABEL: name: mul_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -54,9 +54,9 @@ ; GCN-LABEL: name: mul_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -75,9 +75,9 @@ ; GCN-LABEL: name: mul_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -17,21 +17,21 @@ ; WAVE64-LABEL: name: or_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -57,16 +57,16 @@ ; WAVE64-LABEL: name: or_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -89,16 +89,16 @@ ; WAVE64-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -121,16 +121,16 @@ ; WAVE64-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -153,16 +153,16 @@ ; WAVE64-LABEL: name: or_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -183,16 +183,16 @@ ; WAVE64-LABEL: name: or_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -213,16 +213,16 @@ ; WAVE64-LABEL: name: or_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -243,16 +243,16 @@ ; WAVE64-LABEL: name: or_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -273,16 +273,16 @@ ; WAVE64-LABEL: name: or_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -303,16 +303,16 @@ ; WAVE64-LABEL: name: or_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -333,16 +333,16 @@ ; WAVE64-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -395,22 +395,22 @@ ; WAVE64-LABEL: name: or_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] @@ -440,27 +440,27 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_OR_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_OR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -488,9 +488,9 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -499,15 +499,15 @@ ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_OR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -17,20 +17,20 @@ ; GFX8-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ ; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,20 +88,20 @@ ; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4, implicit %3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -125,19 +125,19 @@ ; GFX8-LABEL: name: add_p3_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p3_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -161,19 +161,19 @@ ; GFX8-LABEL: name: add_p5_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p5_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -197,20 +197,20 @@ ; GFX8-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -234,20 +234,20 @@ ; GFX8-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir @@ -17,20 +17,20 @@ ; GFX8-LABEL: name: and_or_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX9-LABEL: name: and_or_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ ; GFX8-LABEL: name: and_or_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,19 +88,19 @@ ; GFX8-LABEL: name: and_or_s32_vgpr_vgpr_vgpr_commute ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY2]], [[V_AND_B32_e64_]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY2]], [[V_AND_B32_e64_]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_vgpr_vgpr_vgpr_commute ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -123,22 +123,22 @@ ; GFX8-LABEL: name: and_or_s32_sgpr_sgpr_vgpr ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_AND_B32_]] + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_sgpr_sgpr_vgpr ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir @@ -17,20 +17,20 @@ ; GFX8-LABEL: name: or_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_1]] ; GFX9-LABEL: name: or_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ ; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]] ; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR3_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,20 +88,20 @@ ; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -13,10 +13,10 @@ ; GFX6-LABEL: name: smed3_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -41,12 +41,12 @@ ; GFX6-LABEL: name: smed3_s32_sss ; GFX6: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_1]] %0:sgpr(s32) = COPY $sgpr0 @@ -71,11 +71,11 @@ ; GFX6-LABEL: name: smed3_s32_vvv_multiuse0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -99,11 +99,11 @@ ; GFX6-LABEL: name: smed3_s32_vvv_multiuse1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -127,12 +127,12 @@ ; GFX6-LABEL: name: smed3_s32_vvv_multiuse2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -156,12 +156,12 @@ ; GFX6-LABEL: name: smed3_s32_vvv_reuse_bounds ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MED3_I32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -16,29 +16,29 @@ ; GFX8-LABEL: name: smed3_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]] ; GFX9-LABEL: name: smed3_s16_vvv ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -67,31 +67,31 @@ ; GFX8-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -120,31 +120,31 @@ ; GFX8-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -173,33 +173,33 @@ ; GFX8-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[COPY2]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -13,10 +13,10 @@ ; GFX6-LABEL: name: umed3_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -41,12 +41,12 @@ ; GFX6-LABEL: name: umed3_s32_sss ; GFX6: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[PRED_COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_1]] %0:sgpr(s32) = COPY $sgpr0 @@ -71,11 +71,11 @@ ; GFX6-LABEL: name: umed3_s32_vvv_multiuse0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -99,11 +99,11 @@ ; GFX6-LABEL: name: umed3_s32_vvv_multiuse1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -127,12 +127,12 @@ ; GFX6-LABEL: name: umed3_s32_vvv_multiuse2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -156,12 +156,12 @@ ; GFX6-LABEL: name: smed3_s32_vvv_reuse_bounds ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MED3_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -16,29 +16,29 @@ ; GFX8-LABEL: name: umed3_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]] ; GFX9-LABEL: name: umed3_s16_vvv ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -67,31 +67,31 @@ ; GFX8-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -120,31 +120,31 @@ ; GFX8-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -173,33 +173,33 @@ ; GFX8-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[V_MIN_U16_t16_e64_]], [[COPY2]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[V_MIN_U16_t16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir @@ -17,29 +17,29 @@ ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -62,28 +62,28 @@ ; GFX8-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_1]] ; GFX9-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_1]] ; GFX10-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -110,32 +110,32 @@ ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -160,32 +160,32 @@ ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -210,28 +210,28 @@ ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -13,13 +13,13 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -29,8 +29,8 @@ ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 @@ -66,25 +66,25 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 @@ -120,13 +120,13 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -136,8 +136,8 @@ ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 @@ -173,25 +173,25 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $vgpr0, $sgpr1 @@ -227,13 +227,13 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -243,8 +243,8 @@ ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4 @@ -279,25 +279,25 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 @@ -333,21 +333,21 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -387,13 +387,13 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -403,8 +403,8 @@ ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 @@ -440,25 +440,25 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -18,66 +18,66 @@ ; GFX6-LABEL: name: gep_p0_sgpr_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 @@ -98,67 +98,67 @@ ; GFX6-LABEL: name: gep_p0_vgpr_vgpr ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -178,67 +178,67 @@ ; GFX6-LABEL: name: gep_p0_sgpr_vgpr ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_sgpr_vgpr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_sgpr_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -258,37 +258,37 @@ ; GFX6-LABEL: name: gep_p3_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p3_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p3_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -308,37 +308,37 @@ ; GFX6-LABEL: name: gep_p3_vgpr_vgpr ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX8-LABEL: name: gep_p3_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: gep_p3_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -358,37 +358,37 @@ ; GFX6-LABEL: name: gep_p3_sgpr_vgpr ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX8-LABEL: name: gep_p3_sgpr_vgpr ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: gep_p3_sgpr_vgpr ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:sgpr(p3) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -408,37 +408,37 @@ ; GFX6-LABEL: name: gep_p6_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p6_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p6_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p6) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -458,37 +458,37 @@ ; GFX6-LABEL: name: gep_p2_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p2_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p2_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p2) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -508,66 +508,66 @@ ; GFX6-LABEL: name: gep_p999_sgpr_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p999_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p999_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 @@ -588,67 +588,67 @@ ; GFX6-LABEL: name: gep_p999_vgpr_vgpr ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p999_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p999_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -13,9 +13,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -36,9 +36,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -252645136 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -252645136 @@ -59,9 +59,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xffffffff ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1 @@ -82,9 +82,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0x00000000 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 0 @@ -105,9 +105,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2147483648 @@ -128,9 +128,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1073741824 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1073741824 @@ -151,9 +151,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2 @@ -174,9 +174,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -4 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -4 @@ -197,9 +197,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -8 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -8 @@ -220,9 +220,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -16 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -16 @@ -243,9 +243,9 @@ ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -536870912 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -536870912 @@ -266,9 +266,9 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -289,10 +289,10 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xffffffffffffffff ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 @@ -313,9 +313,9 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[S_MOV_B64_]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 @@ -336,11 +336,11 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[REG_SEQUENCE]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 @@ -361,11 +361,11 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 @@ -386,15 +386,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4294967296 @@ -415,11 +415,11 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clear_32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 4294967296 @@ -440,13 +440,13 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo1 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -2 @@ -467,13 +467,13 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo2 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4 @@ -494,13 +494,13 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo3 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -8 @@ -521,13 +521,13 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo4 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -16 @@ -548,15 +548,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo29 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -536870912 @@ -577,9 +577,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -252645136 @@ -600,9 +600,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -2 @@ -623,9 +623,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -4 @@ -646,9 +646,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -8 @@ -669,9 +669,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -16 @@ -692,9 +692,9 @@ ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -536870912 @@ -715,14 +715,14 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY2]], [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY3]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -744,16 +744,16 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY2]], [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -775,15 +775,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -2 @@ -804,15 +804,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 @@ -833,15 +833,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 @@ -862,15 +862,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -16 @@ -891,15 +891,15 @@ ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo29 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -536870912 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir @@ -14,8 +14,8 @@ ; CHECK-LABEL: name: ptrtoint_s_p3_to_s_s32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -35,8 +35,8 @@ ; CHECK-LABEL: name: ptrtoint_s_p5_to_s_s32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p5) = COPY $sgpr0 %1:sgpr(s32) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -56,8 +56,8 @@ ; CHECK-LABEL: name: ptrtoint_s_p0_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -77,8 +77,8 @@ ; CHECK-LABEL: name: ptrtoint_s_p1_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -98,8 +98,8 @@ ; CHECK-LABEL: name: ptrtoint_s_p999_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -15,8 +15,8 @@ ; CHECK-LABEL: name: return_address_already_live_in_copy ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0, implicit %1 @@ -34,9 +34,9 @@ ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0, implicit %1 @@ -54,8 +54,8 @@ ; CHECK-LABEL: name: return_address_no_live_in ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0 ... @@ -72,11 +72,11 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] bb.0: G_BR %bb.1 @@ -97,11 +97,11 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY]] bb.0: %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 G_BR %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir @@ -16,10 +16,10 @@ ; CHECK-LABEL: name: sbfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec - ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2 @@ -39,10 +39,10 @@ ; CHECK-LABEL: name: sbfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir @@ -13,14 +13,14 @@ ; GCN-LABEL: name: select_s32_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -44,14 +44,14 @@ ; GCN-LABEL: name: select_s64_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -75,14 +75,14 @@ ; GCN-LABEL: name: select_p0_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -106,14 +106,14 @@ ; GCN-LABEL: name: select_p1_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -137,14 +137,14 @@ ; GCN-LABEL: name: select_p999_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -168,14 +168,14 @@ ; GCN-LABEL: name: select_v4s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -199,14 +199,14 @@ ; GCN-LABEL: name: select_s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -232,14 +232,14 @@ ; GCN-LABEL: name: select_v2s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -263,12 +263,12 @@ ; GCN-LABEL: name: select_s32_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -292,12 +292,12 @@ ; GCN-LABEL: name: select_s16_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -323,12 +323,12 @@ ; GCN-LABEL: name: select_v2s16_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -352,12 +352,12 @@ ; GCN-LABEL: name: select_p3_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -382,12 +382,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 1, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -412,12 +412,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_rhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -442,12 +442,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 3, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -474,14 +474,14 @@ ; GCN-LABEL: name: select_s16_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -510,13 +510,13 @@ ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY2]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -543,16 +543,16 @@ ; GCN-LABEL: name: select_s32_scc_fneg_lhs ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -577,16 +577,16 @@ ; GCN-LABEL: name: select_s32_scc_fneg_rhs ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY3]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[S_XOR_B32_]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir @@ -13,9 +13,9 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s32_1 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 1 $sgpr0 = COPY %1 @@ -33,9 +33,9 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s32_2 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 131072, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 131072, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 2 $sgpr0 = COPY %1 @@ -53,9 +53,9 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s32_8 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I8_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I8_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 8 $sgpr0 = COPY %1 @@ -73,9 +73,9 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s32_16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 16 $sgpr0 = COPY %1 @@ -93,9 +93,9 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s32_31 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 2031616, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 2031616, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 31 $sgpr0 = COPY %1 @@ -113,11 +113,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_1 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 1 $sgpr0_sgpr1 = COPY %1 @@ -135,11 +135,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_2 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 131072, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 2 $sgpr0_sgpr1 = COPY %1 @@ -157,11 +157,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_8 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 524288, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 8 $sgpr0_sgpr1 = COPY %1 @@ -179,11 +179,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_16 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 16 $sgpr0_sgpr1 = COPY %1 @@ -201,11 +201,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_31 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2031616, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 31 $sgpr0_sgpr1 = COPY %1 @@ -224,10 +224,10 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_32 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]].sub0, 31, implicit-def $scc - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]].sub0, 31, implicit-def $scc + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 32 $sgpr0_sgpr1 = COPY %1 @@ -245,11 +245,11 @@ ; GCN-LABEL: name: sext_inreg_sgpr_s64_63 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 4128768, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 63 $sgpr0_sgpr1 = COPY %1 @@ -267,9 +267,9 @@ ; GCN-LABEL: name: sext_inreg_vgpr_s32_1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 1 $vgpr0 = COPY %1 @@ -287,9 +287,9 @@ ; GCN-LABEL: name: sext_inreg_vgpr_s32_2 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 2, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 2, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 2 $vgpr0 = COPY %1 @@ -307,9 +307,9 @@ ; GCN-LABEL: name: sext_inreg_vgpr_s32_8 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 8, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 8, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 8 $vgpr0 = COPY %1 @@ -327,9 +327,9 @@ ; GCN-LABEL: name: sext_inreg_vgpr_s32_16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 16 $vgpr0 = COPY %1 @@ -347,9 +347,9 @@ ; GCN-LABEL: name: sext_inreg_vgpr_s32_31 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 31, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 31, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 31 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -13,10 +13,10 @@ ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_BFE_I32_]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_SEXT %1 @@ -36,9 +36,9 @@ ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -57,11 +57,11 @@ ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -80,9 +80,9 @@ ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -102,11 +102,11 @@ ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -126,10 +126,10 @@ ; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], 31, implicit-def $scc - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], 31, implicit-def $scc + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_SEXT %0 $sgpr0_sgpr1 = COPY %1 @@ -163,10 +163,10 @@ ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_BFE_I32_e64_]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_SEXT %1 @@ -186,9 +186,9 @@ ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -207,9 +207,9 @@ ; GCN-LABEL: name: sext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -229,9 +229,9 @@ ; GCN-LABEL: name: sext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir @@ -18,21 +18,21 @@ ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -52,21 +52,21 @@ ; GFX6-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_]] ; GFX7-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_]] ; GFX9-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -103,23 +103,23 @@ ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -17,37 +17,37 @@ ; GFX6-LABEL: name: shl_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX7-LABEL: name: shl_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX8-LABEL: name: shl_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX9-LABEL: name: shl_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX10-LABEL: name: shl_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ ; GFX6-LABEL: name: shl_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ ; GFX6-LABEL: name: shl_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ ; GFX6-LABEL: name: shl_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ ; GFX6-LABEL: name: shl_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX7-LABEL: name: shl_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX8-LABEL: name: shl_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX9-LABEL: name: shl_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX10-LABEL: name: shl_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ ; GFX6-LABEL: name: shl_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ ; GFX6-LABEL: name: shl_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ ; GFX6-LABEL: name: shl_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -83,30 +83,30 @@ ; GFX8-LABEL: name: shl_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,30 +176,30 @@ ; GFX8-LABEL: name: shl_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,31 +221,31 @@ ; GFX8-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -421,30 +421,30 @@ ; GFX8-LABEL: name: shl_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir @@ -79,16 +79,16 @@ ; GFX9-LABEL: name: shl_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ ; GFX9-LABEL: name: shl_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ ; GFX9-LABEL: name: shl_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -16,33 +16,33 @@ ; WAVE64-LABEL: name: sitofp ; WAVE64: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: sitofp ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX11-LABEL: name: sitofp ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -72,24 +72,24 @@ ; WAVE64-LABEL: name: sitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: sitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -109,24 +109,24 @@ ; WAVE64-LABEL: name: sitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: sitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: smax_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ ; GCN-LABEL: name: smax_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ ; GCN-LABEL: name: smax_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ ; GCN-LABEL: name: smax_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: smin_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ ; GCN-LABEL: name: smin_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ ; GCN-LABEL: name: smin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ ; GCN-LABEL: name: smin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir @@ -28,9 +28,9 @@ ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -50,16 +50,16 @@ ; SI-LABEL: name: smulh_s32_sv ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -79,16 +79,16 @@ ; SI-LABEL: name: smulh_s32_vs ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -108,16 +108,16 @@ ; SI-LABEL: name: smulh_s32_vv ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -18,15 +18,15 @@ ; GFX7-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 0) @@ -163,15 +163,15 @@ ; GFX7-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -19,23 +19,23 @@ ; GFX6-LABEL: name: atomic_store_local_s32_seq_cst ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 3) @@ -204,23 +204,23 @@ ; GFX6-LABEL: name: atomic_store_local_s64_seq_cst ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -19,33 +19,33 @@ ; GFX7-LABEL: name: store_flat_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX11-LABEL: name: store_flat_s32_to_4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s32), align 4, addrspace 0) @@ -65,33 +65,33 @@ ; GFX7-LABEL: name: store_flat_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX8-LABEL: name: store_flat_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX9-LABEL: name: store_flat_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX10-LABEL: name: store_flat_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX11-LABEL: name: store_flat_s32_to_2 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s16), align 2, addrspace 0) @@ -111,33 +111,33 @@ ; GFX7-LABEL: name: store_flat_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX8-LABEL: name: store_flat_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX9-LABEL: name: store_flat_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX10-LABEL: name: store_flat_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX11-LABEL: name: store_flat_s32_to_1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s8), align 1, addrspace 0) @@ -158,33 +158,33 @@ ; GFX7-LABEL: name: store_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX8-LABEL: name: store_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX9-LABEL: name: store_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX10-LABEL: name: store_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX11-LABEL: name: store_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (s64), align 8, addrspace 0) @@ -297,33 +297,33 @@ ; GFX7-LABEL: name: store_flat_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX8-LABEL: name: store_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX9-LABEL: name: store_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX10-LABEL: name: store_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX11-LABEL: name: store_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0) @@ -343,33 +343,33 @@ ; GFX7-LABEL: name: store_flat_v3s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX8-LABEL: name: store_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX9-LABEL: name: store_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX10-LABEL: name: store_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX11-LABEL: name: store_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0) @@ -389,33 +389,33 @@ ; GFX7-LABEL: name: store_flat_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX8-LABEL: name: store_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX9-LABEL: name: store_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX10-LABEL: name: store_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX11-LABEL: name: store_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0) @@ -436,33 +436,33 @@ ; GFX7-LABEL: name: store_flat_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX8-LABEL: name: store_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX9-LABEL: name: store_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX10-LABEL: name: store_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX11-LABEL: name: store_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0) @@ -483,33 +483,33 @@ ; GFX7-LABEL: name: store_flat_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX8-LABEL: name: store_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX9-LABEL: name: store_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX10-LABEL: name: store_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX11-LABEL: name: store_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0) @@ -576,33 +576,33 @@ ; GFX7-LABEL: name: store_flat_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX8-LABEL: name: store_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX9-LABEL: name: store_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX10-LABEL: name: store_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX11-LABEL: name: store_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0) @@ -623,33 +623,33 @@ ; GFX7-LABEL: name: store_flat_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX8-LABEL: name: store_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX9-LABEL: name: store_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX10-LABEL: name: store_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX11-LABEL: name: store_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0) @@ -670,33 +670,33 @@ ; GFX7-LABEL: name: store_flat_p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX8-LABEL: name: store_flat_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX9-LABEL: name: store_flat_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX10-LABEL: name: store_flat_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX11-LABEL: name: store_flat_p1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p1), align 8, addrspace 0) @@ -764,33 +764,33 @@ ; GFX7-LABEL: name: store_flat_p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX8-LABEL: name: store_flat_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX9-LABEL: name: store_flat_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX10-LABEL: name: store_flat_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX11-LABEL: name: store_flat_p3 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store (p3), align 4, addrspace 0) @@ -857,33 +857,33 @@ ; GFX7-LABEL: name: store_atomic_flat_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX11-LABEL: name: store_atomic_flat_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0) @@ -904,33 +904,33 @@ ; GFX7-LABEL: name: store_atomic_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX11-LABEL: name: store_atomic_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0) @@ -951,63 +951,63 @@ ; GFX7-LABEL: name: store_flat_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX11-LABEL: name: store_flat_s32_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -21,49 +21,49 @@ ; GFX6-LABEL: name: store_global_s32_to_4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) @@ -83,49 +83,49 @@ ; GFX6-LABEL: name: store_global_s32_to_2 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s16), align 2, addrspace 1) @@ -145,49 +145,49 @@ ; GFX6-LABEL: name: store_global_s32_to_1 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s8), align 1, addrspace 1) @@ -214,33 +214,33 @@ ; GFX7-LABEL: name: store_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX8-LABEL: name: store_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX9-LABEL: name: store_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; GFX10-LABEL: name: store_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) @@ -313,49 +313,49 @@ ; GFX6-LABEL: name: store_global_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) @@ -375,49 +375,49 @@ ; GFX6-LABEL: name: store_global_v4s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) @@ -444,33 +444,33 @@ ; GFX7-LABEL: name: store_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 1) @@ -497,33 +497,33 @@ ; GFX7-LABEL: name: store_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) @@ -550,33 +550,33 @@ ; GFX7-LABEL: name: store_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) @@ -603,33 +603,33 @@ ; GFX7-LABEL: name: store_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) @@ -656,33 +656,33 @@ ; GFX7-LABEL: name: store_global_p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX8-LABEL: name: store_global_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX9-LABEL: name: store_global_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) ; GFX10-LABEL: name: store_global_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p1), align 8, addrspace 1) @@ -762,33 +762,33 @@ ; GFX7-LABEL: name: store_global_p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX8-LABEL: name: store_global_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX9-LABEL: name: store_global_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) ; GFX10-LABEL: name: store_global_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) @@ -867,33 +867,33 @@ ; GFX7-LABEL: name: store_atomic_global_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 1) @@ -920,33 +920,33 @@ ; GFX7-LABEL: name: store_atomic_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 1) @@ -967,69 +967,69 @@ ; GFX6-LABEL: name: store_global_s32_gep_2047 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir @@ -22,38 +22,38 @@ ; GFX7-LABEL: name: store_global_v3s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX8-LABEL: name: store_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX9-LABEL: name: store_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX10-LABEL: name: store_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -23,29 +23,29 @@ ; GFX6-LABEL: name: store_local_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_4 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 3) @@ -69,29 +69,29 @@ ; GFX6-LABEL: name: store_local_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B16 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_2 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B16 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_2 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 3) @@ -115,29 +115,29 @@ ; GFX6-LABEL: name: store_local_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B8 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_1 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B8 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_1 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 3) @@ -161,29 +161,29 @@ ; GFX6-LABEL: name: store_local_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX10-LABEL: name: store_local_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 3) @@ -207,29 +207,29 @@ ; GFX6-LABEL: name: store_local_p3 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX7-LABEL: name: store_local_p3 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX9-LABEL: name: store_local_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) ; GFX10-LABEL: name: store_local_p3 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 3) @@ -333,28 +333,28 @@ ; GFX7-LABEL: name: store_local_s64_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (s64), align 4, addrspace 3) @@ -385,28 +385,28 @@ ; GFX7-LABEL: name: store_local_p1_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_p1_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (p1), align 4, addrspace 3) @@ -437,28 +437,28 @@ ; GFX7-LABEL: name: store_local_v2s32_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_v2s32_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<2 x s32>), align 4, addrspace 3) @@ -489,28 +489,28 @@ ; GFX7-LABEL: name: store_local_v4s16_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_v4s16_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<4 x s16>), align 4, addrspace 3) @@ -534,29 +534,29 @@ ; GFX6-LABEL: name: store_local_s64_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX7-LABEL: name: store_local_s64_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX9-LABEL: name: store_local_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) ; GFX10-LABEL: name: store_local_s64_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (s64), align 8, addrspace 3) @@ -580,29 +580,29 @@ ; GFX6-LABEL: name: store_local_p1_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX7-LABEL: name: store_local_p1_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX9-LABEL: name: store_local_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) ; GFX10-LABEL: name: store_local_p1_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (p1), align 8, addrspace 3) @@ -626,29 +626,29 @@ ; GFX6-LABEL: name: store_local_v2s32_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s32_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX10-LABEL: name: store_local_v2s32_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<2 x s32>), align 8, addrspace 3) @@ -672,29 +672,29 @@ ; GFX6-LABEL: name: store_local_v4s16_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v4s16_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX10-LABEL: name: store_local_v4s16_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<4 x s16>), align 8, addrspace 3) @@ -727,28 +727,28 @@ ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1016 @@ -783,34 +783,34 @@ ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1020 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -23,21 +23,21 @@ ; GFX6-LABEL: name: function_store_private_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -62,21 +62,21 @@ ; GFX6-LABEL: name: function_store_private_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -101,21 +101,21 @@ ; GFX6-LABEL: name: function_store_private_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -140,21 +140,21 @@ ; GFX6-LABEL: name: function_store_private_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX9-LABEL: name: function_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX11-LABEL: name: function_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -179,21 +179,21 @@ ; GFX6-LABEL: name: function_store_private_p3 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: function_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX11-LABEL: name: function_store_private_p3 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -218,21 +218,21 @@ ; GFX6-LABEL: name: function_store_private_p5 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: function_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX11-LABEL: name: function_store_private_p5 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -355,21 +355,21 @@ ; GFX6-LABEL: name: kernel_store_private_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -393,21 +393,21 @@ ; GFX6-LABEL: name: kernel_store_private_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -431,21 +431,21 @@ ; GFX6-LABEL: name: kernel_store_private_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -470,21 +470,21 @@ ; GFX6-LABEL: name: kernel_store_private_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -508,21 +508,21 @@ ; GFX6-LABEL: name: kernel_store_private_p3 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_p3 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -546,21 +546,21 @@ ; GFX6-LABEL: name: kernel_store_private_p5 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_p5 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -701,19 +701,19 @@ ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -738,19 +738,19 @@ ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4095 @@ -777,23 +777,23 @@ ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:vgpr(s32) = G_CONSTANT i32 4095 @@ -819,30 +819,30 @@ ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4096 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -19,24 +19,24 @@ ; GFX6-LABEL: name: sub_s32 ; GFX6: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec - ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %9 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], [[V_SUB_CO_U32_e64_]], 0, implicit $exec + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[V_SUB_CO_U32_e64_2]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_4]] ; GFX9-LABEL: name: sub_s32 ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[PRED_COPY2]], [[S_SUB_I32_]], 0, implicit $exec ; GFX9-NEXT: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec - ; GFX9-NEXT: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir @@ -13,8 +13,8 @@ ; GCN-LABEL: name: trunc_sgpr_s32_to_s1 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -31,8 +31,8 @@ ; GCN-LABEL: name: trunc_sgpr_s32_to_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -49,9 +49,9 @@ ; GCN-LABEL: name: trunc_sgpr_s64_to_s32 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -68,9 +68,9 @@ ; GCN-LABEL: name: trunc_sgpr_s64_to_s16 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -87,9 +87,9 @@ ; GCN-LABEL: name: trunc_sgpr_s64_to_s1 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -106,9 +106,9 @@ ; GCN-LABEL: name: trunc_sgpr_s96_to_s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -125,9 +125,9 @@ ; GCN-LABEL: name: trunc_sgpr_s96_to_s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -144,9 +144,9 @@ ; GCN-LABEL: name: trunc_sgpr_s128_to_s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -163,9 +163,9 @@ ; GCN-LABEL: name: trunc_sgpr_s128_to_s96 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -182,9 +182,9 @@ ; GCN-LABEL: name: trunc_sgpr_s256_to_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s128) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -201,9 +201,9 @@ ; GCN-LABEL: name: trunc_sgpr_s512_to_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s256) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -220,8 +220,8 @@ ; GCN-LABEL: name: trunc_vgpr_s32_to_s1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -238,8 +238,8 @@ ; GCN-LABEL: name: trunc_vgpr_s32_to_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -256,9 +256,9 @@ ; GCN-LABEL: name: trunc_vgpr_s64_to_s32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -275,9 +275,9 @@ ; GCN-LABEL: name: trunc_vgpr_s64_to_s16 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -294,9 +294,9 @@ ; GCN-LABEL: name: trunc_vgpr_s64_to_s1 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -313,9 +313,9 @@ ; GCN-LABEL: name: trunc_vgpr_s96_to_s16 ; GCN: liveins: $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -332,9 +332,9 @@ ; GCN-LABEL: name: trunc_vgpr_s96_to_s64 ; GCN: liveins: $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -351,9 +351,9 @@ ; GCN-LABEL: name: trunc_vgpr_s128_to_s16 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -370,9 +370,9 @@ ; GCN-LABEL: name: trunc_vgpr_s128_to_s96 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY [[COPY]].sub0_sub1_sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s96) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -389,9 +389,9 @@ ; GCN-LABEL: name: trunc_vgpr_s256_to_s128 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -408,9 +408,9 @@ ; GCN-LABEL: name: trunc_vgpr_s512_to_s256 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_256 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(s256) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -428,10 +428,10 @@ ; GCN-LABEL: name: trunc_sgpr_s32_to_s1_use ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: $scc = COPY [[COPY]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir @@ -17,34 +17,34 @@ ; GFX6-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX8-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX11-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 @@ -64,31 +64,31 @@ ; GFX6-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX8-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[COPY1]](tied-def 0) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[PRED_COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[PRED_COPY1]](tied-def 0) ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_sdwa]] ; GFX11-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[PRED_COPY2]], implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX11-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir @@ -16,12 +16,12 @@ ; GFX10-LABEL: name: uadde_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec @@ -50,12 +50,12 @@ ; GFX10-LABEL: name: uadde_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -17,32 +17,32 @@ ; WAVE64-LABEL: name: uadde_s32_s1_sss ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY3]] - ; WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY4]] - ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] ; WAVE32-LABEL: name: uadde_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY3]] - ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY4]] - ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,24 +66,24 @@ ; WAVE64-LABEL: name: uadde_s32_s1_vvv ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: uadde_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir @@ -17,42 +17,42 @@ ; GFX6-LABEL: name: uaddo_s32_s1_sss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX6-NEXT: $scc = COPY [[COPY2]] - ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX6-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX8-LABEL: name: uaddo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: $scc = COPY [[COPY2]] - ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX9-LABEL: name: uaddo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX9-NEXT: $scc = COPY [[COPY2]] - ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX9-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX10-LABEL: name: uaddo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX10-NEXT: $scc = COPY [[COPY2]] - ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX10-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,34 +73,34 @@ ; GFX6-LABEL: name: uaddo_s32_s1_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: uaddo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: uaddo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: uaddo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -121,9 +121,9 @@ ; GFX6-LABEL: name: uaddo_s32_s1_vsv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -131,9 +131,9 @@ ; GFX8-LABEL: name: uaddo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -141,9 +141,9 @@ ; GFX9-LABEL: name: uaddo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -151,9 +151,9 @@ ; GFX10-LABEL: name: uaddo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -179,9 +179,9 @@ ; GFX6-LABEL: name: uaddo_s32_s1_vvs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -189,9 +189,9 @@ ; GFX8-LABEL: name: uaddo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -199,9 +199,9 @@ ; GFX9-LABEL: name: uaddo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -209,9 +209,9 @@ ; GFX10-LABEL: name: uaddo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir @@ -30,10 +30,10 @@ ; CHECK-LABEL: name: ubfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec - ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2 @@ -67,10 +67,10 @@ ; CHECK-LABEL: name: ubfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -16,21 +16,21 @@ ; WAVE64-LABEL: name: uitofp_s32_to_s32_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -49,21 +49,21 @@ ; WAVE64-LABEL: name: uitofp_s32_to_s32_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -82,24 +82,24 @@ ; WAVE64-LABEL: name: uitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -119,24 +119,24 @@ ; WAVE64-LABEL: name: uitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: umax_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ ; GCN-LABEL: name: umax_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ ; GCN-LABEL: name: umax_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ ; GCN-LABEL: name: umax_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir @@ -14,9 +14,9 @@ ; GCN-LABEL: name: umin_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ ; GCN-LABEL: name: umin_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ ; GCN-LABEL: name: umin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ ; GCN-LABEL: name: umin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir @@ -28,9 +28,9 @@ ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -50,16 +50,16 @@ ; SI-LABEL: name: umulh_s32_sv ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -79,16 +79,16 @@ ; SI-LABEL: name: umulh_s32_vs ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -108,16 +108,16 @@ ; SI-LABEL: name: umulh_s32_vv ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -14,10 +14,10 @@ ; GCN-LABEL: name: test_unmerge_values_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -36,10 +36,10 @@ ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -58,10 +58,10 @@ ; GCN-LABEL: name: test_unmerge_values_v_s32_s_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -80,10 +80,10 @@ ; GCN-LABEL: name: test_unmerge_values_s_s32_v_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -102,11 +102,11 @@ ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s96 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -125,12 +125,12 @@ ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s32_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY4]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32), %4:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 @@ -149,10 +149,10 @@ ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64), %2:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -172,10 +172,10 @@ ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -194,10 +194,10 @@ ; GCN-LABEL: name: test_unmerge_values_rc_set_def_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr_32(s32), %2:vgpr_32(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -216,10 +216,10 @@ ; GCN-LABEL: name: test_unmerge_values_rc_set_use_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vreg_64(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -239,11 +239,11 @@ ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_1024 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s1024) = G_IMPLICIT_DEF %1:sgpr(s256), %2:sgpr(s256), %3:sgpr(s256), %4:sgpr(s256) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 @@ -268,11 +268,11 @@ ; GCN-LABEL: name: test_unmerge_values_s_s512_s_s1024 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_512 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[COPY1]] - ; GCN-NEXT: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_512 = PRED_COPY [[PRED_COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[PRED_COPY2]] %0:sgpr(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s512), %2:sgpr(s512) = G_UNMERGE_VALUES %0 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -292,19 +292,19 @@ ; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]] - ; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]] - ; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]] - ; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr3_sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr9_sgpr10_sgpr11 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2, [[PRED_COPY1]], %subreg.sub3_sub4_sub5, [[PRED_COPY2]], %subreg.sub6_sub7_sub8, [[PRED_COPY3]], %subreg.sub9_sub10_sub11 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = PRED_COPY [[PRED_COPY7]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5 %2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8 @@ -330,17 +330,17 @@ ; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_384 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_192 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_192 = PRED_COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_384 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[PRED_COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = PRED_COPY [[PRED_COPY5]] %0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:vgpr(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 %2:vgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir @@ -16,12 +16,12 @@ ; GFX10-LABEL: name: usube_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec @@ -50,12 +50,12 @@ ; GFX10-LABEL: name: usube_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -17,32 +17,32 @@ ; WAVE64-LABEL: name: usube_s32_s1_sss ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY3]] - ; WAVE64-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY4]] - ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE64-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] ; WAVE32-LABEL: name: usube_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY3]] - ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY4]] - ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,24 +66,24 @@ ; WAVE64-LABEL: name: usube_s32_s1_vvv ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: usube_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir @@ -17,42 +17,42 @@ ; GFX6-LABEL: name: usubo_s32_s1_sss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX6-NEXT: $scc = COPY [[COPY2]] - ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX6-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX8-LABEL: name: usubo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: $scc = COPY [[COPY2]] - ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX9-LABEL: name: usubo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX9-NEXT: $scc = COPY [[COPY2]] - ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX9-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX10-LABEL: name: usubo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX10-NEXT: $scc = COPY [[COPY2]] - ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX10-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,34 +73,34 @@ ; GFX6-LABEL: name: usubo_s32_s1_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: usubo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: usubo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: usubo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -121,9 +121,9 @@ ; GFX6-LABEL: name: usubo_s32_s1_vsv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -131,9 +131,9 @@ ; GFX8-LABEL: name: usubo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -141,9 +141,9 @@ ; GFX9-LABEL: name: usubo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -151,9 +151,9 @@ ; GFX10-LABEL: name: usubo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -179,9 +179,9 @@ ; GFX6-LABEL: name: usubo_s32_s1_vvs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -189,9 +189,9 @@ ; GFX8-LABEL: name: usubo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -199,9 +199,9 @@ ; GFX9-LABEL: name: usubo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -209,9 +209,9 @@ ; GFX10-LABEL: name: usubo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -17,21 +17,21 @@ ; WAVE64-LABEL: name: xor_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -58,16 +58,16 @@ ; WAVE64-LABEL: name: xor_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -90,16 +90,16 @@ ; WAVE64-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -122,16 +122,16 @@ ; WAVE64-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -154,16 +154,16 @@ ; WAVE64-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -184,16 +184,16 @@ ; WAVE64-LABEL: name: xor_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -214,16 +214,16 @@ ; WAVE64-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -244,16 +244,16 @@ ; WAVE64-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -274,16 +274,16 @@ ; WAVE64-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -304,16 +304,16 @@ ; WAVE64-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -334,16 +334,16 @@ ; WAVE64-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -396,22 +396,22 @@ ; WAVE64-LABEL: name: xor_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] @@ -441,27 +441,27 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_XOR_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_XOR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -489,9 +489,9 @@ ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -500,15 +500,15 @@ ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_XOR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -13,10 +13,10 @@ ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_ZEXT %1 @@ -36,9 +36,9 @@ ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -57,11 +57,11 @@ ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -80,9 +80,9 @@ ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[PRED_COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -102,11 +102,11 @@ ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -126,10 +126,10 @@ ; GCN-LABEL: name: zext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_ZEXT %0 $sgpr0_sgpr1 = COPY %1 @@ -163,10 +163,10 @@ ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_ZEXT %1 @@ -186,9 +186,9 @@ ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -207,9 +207,9 @@ ; GCN-LABEL: name: zext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -229,9 +229,9 @@ ; GCN-LABEL: name: zext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir @@ -18,21 +18,21 @@ ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -52,21 +52,21 @@ ; GFX6-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX7-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -103,23 +103,23 @@ ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll @@ -8,8 +8,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -7,12 +7,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -21,12 +21,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -41,12 +41,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -55,12 +55,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -75,12 +75,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -89,12 +89,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -109,12 +109,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -123,12 +123,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -143,12 +143,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -157,12 +157,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -177,12 +177,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -191,12 +191,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -211,12 +211,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -224,12 +224,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -243,12 +243,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -256,12 +256,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -275,12 +275,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -288,12 +288,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -307,12 +307,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -320,12 +320,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -339,12 +339,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -352,12 +352,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -371,12 +371,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -384,12 +384,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -403,12 +403,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -416,12 +416,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -435,12 +435,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -448,12 +448,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -467,12 +467,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -480,12 +480,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -499,12 +499,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -512,12 +512,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -531,12 +531,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -544,12 +544,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -563,12 +563,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -576,12 +576,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -595,12 +595,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -608,12 +608,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -627,12 +627,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -640,12 +640,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -659,12 +659,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -672,12 +672,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -691,12 +691,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -704,12 +704,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -723,12 +723,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -736,12 +736,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -755,12 +755,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -768,12 +768,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -787,12 +787,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -800,12 +800,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -819,12 +819,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -832,12 +832,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -851,12 +851,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -864,12 +864,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -883,12 +883,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -896,12 +896,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -915,12 +915,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -928,12 +928,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -946,12 +946,12 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -959,12 +959,12 @@ ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -978,12 +978,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -991,12 +991,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1009,12 +1009,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1023,12 +1023,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1043,12 +1043,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1057,12 +1057,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1077,12 +1077,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1091,12 +1091,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1111,12 +1111,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1125,12 +1125,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1147,9 +1147,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1158,9 +1158,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1174,9 +1174,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1185,9 +1185,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1209,21 +1209,21 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) @@ -1236,21 +1236,21 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) @@ -1276,21 +1276,21 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) @@ -1303,21 +1303,21 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) @@ -1345,18 +1345,18 @@ ; HSA-VI: bb.1 (%ir-block.1): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) @@ -1368,18 +1368,18 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.1): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) @@ -1403,13 +1403,13 @@ ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: ret void @@ -1421,12 +1421,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1435,12 +1435,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1456,12 +1456,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1470,12 +1470,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1491,14 +1491,14 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1508,14 +1508,14 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1532,14 +1532,14 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) @@ -1549,14 +1549,14 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) @@ -1573,14 +1573,14 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1590,14 +1590,14 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1614,14 +1614,14 @@ ; HSA-VI: bb.1 (%ir-block.1): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) @@ -1631,14 +1631,14 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.1): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) @@ -1656,12 +1656,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1670,12 +1670,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1690,12 +1690,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1704,12 +1704,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1724,12 +1724,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1738,12 +1738,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1758,12 +1758,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1772,12 +1772,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1793,12 +1793,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1807,12 +1807,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1827,16 +1827,16 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) @@ -1848,16 +1848,16 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) @@ -1878,9 +1878,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1889,9 +1889,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1906,9 +1906,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) @@ -1917,9 +1917,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) @@ -1933,7 +1933,7 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) @@ -1942,7 +1942,7 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) @@ -1956,9 +1956,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1967,9 +1967,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1983,9 +1983,9 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1994,9 +1994,9 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2010,12 +2010,12 @@ ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -2027,12 +2027,12 @@ ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -7,10 +7,10 @@ ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -22,11 +22,11 @@ ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -38,8 +38,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %cast = bitcast i32 %vgpr to float ret float %cast @@ -50,9 +50,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 %vgpr } @@ -62,14 +62,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret i64 %vgpr } @@ -79,14 +79,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret <2 x i32> %vgpr } @@ -96,13 +96,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0 %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1 @@ -114,10 +114,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret ptr addrspace(3) %vgpr } @@ -127,14 +127,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret ptr addrspace(1) %vgpr } @@ -144,10 +144,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret <2 x i16> %vgpr } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -6,9 +6,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -19,9 +19,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -32,9 +32,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg0, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, ptr addrspace(4) %arg0 @@ -46,13 +46,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg1, addrspace 4) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, ptr addrspace(4) %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -64,11 +64,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY2]](s32), [[PRED_COPY]](s32), [[PRED_COPY3]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 ret void @@ -79,13 +79,13 @@ ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 main_body: %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 @@ -98,7 +98,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -7,9 +7,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -25,42 +25,42 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -74,42 +74,42 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -124,42 +124,42 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -174,36 +174,36 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %call = tail call ptr addrspace(1) @returns_ptr_align8() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -6,10 +6,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[PRED_COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval @@ -21,10 +21,10 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: @@ -33,7 +33,7 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[PRED_COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 @@ -43,7 +43,7 @@ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2 ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PHI2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -11,35 +11,35 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -52,39 +52,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY11]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -97,30 +97,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY3]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY4]], [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY5]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY8]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -133,34 +133,34 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -173,28 +173,28 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY10]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY11]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -207,19 +207,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY6]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY3]](p4) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY5]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -13,51 +13,51 @@ ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GFX900-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -65,51 +65,51 @@ ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GFX908-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -122,39 +122,39 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -162,39 +162,39 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -209,87 +209,87 @@ ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GFX900-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX900-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX900-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX900-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX900-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX900-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX900-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX900-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX900-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX900-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX900-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX900-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX900-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX900-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX900-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX900-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX900-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX900-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX900-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX900-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX900-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX900-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX900-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX900-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX900-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX900-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX900-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX900-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX900-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX900-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX900-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX900-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -297,87 +297,87 @@ ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GFX908-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX908-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX908-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX908-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX908-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX908-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX908-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX908-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX908-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX908-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX908-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX908-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX908-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX908-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX908-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX908-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX908-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX908-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX908-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX908-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX908-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX908-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX908-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX908-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX908-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX908-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX908-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX908-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX908-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX908-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX908-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX908-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -390,126 +390,126 @@ ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX900-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX900-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX900-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX900-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX900-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX900-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY24]](s32) ; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GFX900-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY25]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX900-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX900-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX900-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX900-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX900-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX900-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX900-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX900-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX900-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX900-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX900-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX900-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX900-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX900-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX900-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX900-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX900-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX900-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX900-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX900-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX900-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX900-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX900-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX900-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX900-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX900-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX900-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX900-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX900-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX900-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX900-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX900-NEXT: [[PRED_COPY35:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY35]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY26]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY27]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY28]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY29]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY30]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY31]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY32]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY33]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY34]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -517,126 +517,126 @@ ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX908-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX908-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX908-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX908-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX908-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX908-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY24]](s32) ; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GFX908-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY25]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX908-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX908-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX908-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX908-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX908-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX908-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX908-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX908-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX908-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX908-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX908-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX908-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX908-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX908-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX908-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX908-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX908-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX908-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX908-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX908-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX908-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX908-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX908-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX908-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX908-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX908-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX908-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX908-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX908-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX908-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX908-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX908-NEXT: [[PRED_COPY35:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY35]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY26]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY27]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY28]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY29]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY30]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY31]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY32]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY33]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY34]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -649,40 +649,40 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY15]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -690,40 +690,40 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY15]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -736,44 +736,44 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -781,44 +781,44 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -831,44 +831,44 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -876,44 +876,44 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -926,45 +926,45 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -972,45 +972,45 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1023,49 +1023,49 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -1073,49 +1073,49 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1128,45 +1128,45 @@ ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 @@ -1174,45 +1174,45 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -13,8 +13,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -27,13 +27,13 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -46,13 +46,13 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -73,10 +73,10 @@ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -99,10 +99,10 @@ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -72,56 +72,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out @@ -133,19 +133,19 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out @@ -157,52 +157,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -217,11 +217,11 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN @@ -235,52 +235,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY21]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) @@ -297,52 +297,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY21]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) @@ -359,52 +359,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -420,11 +420,11 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -439,52 +439,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY21]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -501,52 +501,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY21]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) @@ -563,52 +563,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -622,52 +622,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY21]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) @@ -684,52 +684,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY21]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) @@ -746,53 +746,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_void() store volatile i32 %val, ptr addrspace(1) undef @@ -805,12 +805,12 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, ptr addrspace(1) undef @@ -822,53 +822,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -883,53 +883,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) @@ -946,53 +946,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) @@ -1009,53 +1009,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1069,53 +1069,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1129,56 +1129,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY23]](s32), [[PRED_COPY24]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1193,53 +1193,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 %val = call ptr addrspace(3) @external_p3_func_void() store volatile ptr addrspace(3) %val, ptr addrspace(3) undef @@ -1251,53 +1251,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY21]](p3), [[PRED_COPY22]](p3) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 @@ -1311,52 +1311,52 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1370,53 +1370,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call float @external_f32_func_void() store volatile float %val, ptr addrspace(1) undef @@ -1428,53 +1428,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1488,56 +1488,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY23]](s32), [[PRED_COPY24]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1552,53 +1552,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1612,54 +1612,54 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1673,55 +1673,55 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1735,56 +1735,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1798,59 +1798,59 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1864,67 +1864,67 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[PRED_COPY31]](s32), [[PRED_COPY32]](s32), [[PRED_COPY33]](s32), [[PRED_COPY34]](s32), [[PRED_COPY35]](s32), [[PRED_COPY36]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1938,83 +1938,83 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY51:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY52:%[0-9]+]]:_(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[PRED_COPY31]](s32), [[PRED_COPY32]](s32), [[PRED_COPY33]](s32), [[PRED_COPY34]](s32), [[PRED_COPY35]](s32), [[PRED_COPY36]](s32), [[PRED_COPY37]](s32), [[PRED_COPY38]](s32), [[PRED_COPY39]](s32), [[PRED_COPY40]](s32), [[PRED_COPY41]](s32), [[PRED_COPY42]](s32), [[PRED_COPY43]](s32), [[PRED_COPY44]](s32), [[PRED_COPY45]](s32), [[PRED_COPY46]](s32), [[PRED_COPY47]](s32), [[PRED_COPY48]](s32), [[PRED_COPY49]](s32), [[PRED_COPY50]](s32), [[PRED_COPY51]](s32), [[PRED_COPY52]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2028,53 +2028,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() store volatile <2 x i16> %val, ptr addrspace(1) undef @@ -2086,53 +2086,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2148,53 +2148,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2208,53 +2208,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() store volatile <2 x half> %val, ptr addrspace(1) undef @@ -2266,53 +2266,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2328,53 +2328,53 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2388,54 +2388,54 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2449,56 +2449,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2513,56 +2513,56 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() @@ -2579,15 +2579,15 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() @@ -2603,55 +2603,55 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 @@ -2666,64 +2666,64 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY24]](s32) ; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY25]](s32) ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -2751,51 +2751,51 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) @@ -2818,51 +2818,51 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) @@ -2885,51 +2885,51 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) @@ -2945,16 +2945,16 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) @@ -2964,41 +2964,41 @@ ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: $vgpr1 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[LOAD1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -8,16 +8,16 @@ ; GCN: bb.1 (%ir-block.1): ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -30,42 +30,42 @@ ; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX1]](p5) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -99,48 +99,48 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -153,8 +153,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -167,37 +167,37 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -210,50 +210,50 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -266,50 +266,50 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -322,51 +322,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -379,53 +379,53 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -439,53 +439,53 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -499,53 +499,53 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -558,54 +558,54 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8) ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -619,54 +619,54 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -680,51 +680,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -737,53 +737,53 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -797,53 +797,53 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -857,51 +857,51 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -914,13 +914,13 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -933,13 +933,13 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -952,52 +952,52 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1010,55 +1010,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1072,56 +1072,56 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1134,55 +1134,55 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1196,55 +1196,55 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1258,55 +1258,55 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1320,53 +1320,53 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1379,55 +1379,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1441,16 +1441,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -1459,43 +1459,43 @@ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1511,16 +1511,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 @@ -1529,45 +1529,45 @@ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1582,51 +1582,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1639,50 +1639,50 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1695,54 +1695,54 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1755,56 +1755,56 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1817,16 +1817,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 @@ -1835,42 +1835,42 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C5]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1883,52 +1883,52 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1941,56 +1941,56 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2003,59 +2003,59 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2068,51 +2068,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2126,56 +2126,56 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2189,56 +2189,56 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2252,53 +2252,53 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2312,16 +2312,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 @@ -2329,39 +2329,39 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2374,57 +2374,57 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2438,58 +2438,58 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2503,38 +2503,38 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF @@ -2544,48 +2544,48 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV63]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV64]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2599,38 +2599,38 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF @@ -2643,48 +2643,48 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV94]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV95]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2698,38 +2698,38 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -2739,48 +2739,48 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2794,51 +2794,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2852,53 +2852,53 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2912,54 +2912,54 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2972,16 +2972,16 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -2989,40 +2989,40 @@ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3035,16 +3035,16 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -3053,41 +3053,41 @@ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3100,55 +3100,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3162,16 +3162,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3179,41 +3179,41 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3226,16 +3226,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3244,42 +3244,42 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C5]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3292,60 +3292,60 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3360,16 +3360,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3381,45 +3381,45 @@ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C8]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C9]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C10]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3432,68 +3432,68 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3508,87 +3508,87 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3603,16 +3603,16 @@ ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -3621,24 +3621,24 @@ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3648,48 +3648,48 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3705,16 +3705,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) @@ -3723,24 +3723,24 @@ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3751,55 +3751,55 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s16) = PRED_COPY [[ANYEXT]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: G_STORE [[PRED_COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY21]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3817,16 +3817,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) @@ -3835,24 +3835,24 @@ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3865,48 +3865,48 @@ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3923,16 +3923,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3941,40 +3941,40 @@ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3997,10 +3997,10 @@ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4023,10 +4023,10 @@ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4041,16 +4041,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val @@ -4060,41 +4060,41 @@ ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4114,50 +4114,50 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p5) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[PRED_COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4174,43 +4174,43 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4223,58 +4223,58 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4289,61 +4289,61 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4358,39 +4358,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4398,24 +4398,24 @@ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT6]](s32) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4430,39 +4430,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4474,32 +4474,32 @@ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT8]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT8]](s32) ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT9]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT9]](s32) ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT10]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT10]](s32) ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT11]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT11]](s32) ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT12]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT12]](s32) ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT13]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT13]](s32) ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT14]](s32) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT15]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4514,39 +4514,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4566,48 +4566,48 @@ ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT16]](s32) ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT17]](s32) ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT18]](s32) ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT19]](s32) ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT20]](s32) ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT21]](s32) ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT22]](s32) ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT23]](s32) ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[ANYEXT24]](s32) ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[ANYEXT25]](s32) ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[ANYEXT26]](s32) ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[ANYEXT27]](s32) ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[ANYEXT28]](s32) ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[ANYEXT29]](s32) ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[ANYEXT30]](s32) ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4622,16 +4622,16 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 @@ -4639,24 +4639,24 @@ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -4670,48 +4670,48 @@ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4725,15 +4725,15 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4764,15 +4764,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4801,48 +4801,48 @@ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4868,15 +4868,15 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4907,15 +4907,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4944,48 +4944,48 @@ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5011,15 +5011,15 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -5046,15 +5046,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5091,48 +5091,48 @@ ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5154,15 +5154,15 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5189,15 +5189,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5234,48 +5234,48 @@ ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5298,27 +5298,27 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY [[DEF]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[C1]](p4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p4) = PRED_COPY [[C1]](p4) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[DEF]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY6]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[DEF1]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY3]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY5]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -11,11 +11,11 @@ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (ptr @var, ptr inttoptr (i32 -1 to ptr)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -71,7 +71,7 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (ptr getelementptr inbounds ([2 x i32], ptr @a, i64 0, i64 1), ptr @var) to float), float 0.000000e+00) to i32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -6,10 +6,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -20,10 +20,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -34,10 +34,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nofpexcept G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -48,10 +48,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -62,10 +62,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val @@ -76,16 +76,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val @@ -96,16 +96,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val @@ -116,16 +116,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val @@ -136,10 +136,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FSUB:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FSUB [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FSUB]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -150,10 +150,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FMUL:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMUL [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FMUL]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -164,10 +164,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FDIV:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FDIV [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FDIV]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -178,10 +178,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FREM:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FREM [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FREM]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -192,11 +192,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[STRICT_FMA:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMA [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FMA]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -207,9 +207,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[PRED_COPY]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FSQRT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -9,37 +9,37 @@ ; FIXED: bb.1 (%ir-block.0): ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; FIXED-NEXT: {{ $}} - ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; FIXED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; FIXED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; FIXED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; FIXED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; FIXED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; FIXED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; FIXED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; FIXED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; FIXED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; FIXED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; FIXED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; FIXED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; FIXED-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; FIXED-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; FIXED-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; FIXED-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; FIXED-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; FIXED-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; FIXED-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; FIXED-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; FIXED-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; FIXED-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; FIXED-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; FIXED-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; FIXED-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; FIXED-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; FIXED-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; FIXED-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; FIXED-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; FIXED-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; FIXED-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -10,9 +10,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) undef ret void @@ -23,9 +23,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) undef ret void @@ -36,8 +36,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -50,8 +50,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -70,8 +70,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -91,8 +91,8 @@ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -126,8 +126,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -141,8 +141,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -161,8 +161,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -181,8 +181,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -195,8 +195,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -215,8 +215,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -235,8 +235,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -249,8 +249,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) @@ -264,8 +264,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1) @@ -279,9 +279,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -293,9 +293,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -307,9 +307,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, ptr addrspace(1) undef ret void @@ -320,9 +320,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](p3), [[DEF]](p1) :: (store (p3) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store ptr addrspace(3) %arg0, ptr addrspace(1) undef ret void @@ -333,9 +333,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -349,9 +349,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -370,9 +370,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -391,9 +391,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -406,10 +406,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -423,10 +423,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -445,10 +445,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -467,10 +467,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -483,9 +483,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -498,9 +498,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -513,8 +513,8 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -527,9 +527,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store float %arg0, ptr addrspace(1) undef ret void @@ -540,9 +540,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -555,9 +555,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -570,9 +570,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -586,10 +586,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `ptr addrspace(1) undef`, align 16, addrspace 1) @@ -603,10 +603,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -621,12 +621,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -641,14 +641,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -663,9 +663,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY]](p3), [[PRED_COPY1]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -678,10 +678,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -694,11 +694,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -711,12 +711,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -729,15 +729,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -750,23 +750,23 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -779,40 +779,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -826,42 +826,42 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -874,12 +874,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -893,12 +893,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `ptr addrspace(1) undef`, addrspace 1) @@ -912,12 +912,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -931,15 +931,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -953,18 +953,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -978,21 +978,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -1006,30 +1006,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1043,55 +1043,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY16]](s32), [[PRED_COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY18]](s32), [[PRED_COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY26]](s32), [[PRED_COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY28]](s32), [[PRED_COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1105,9 +1105,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, ptr addrspace(1) undef ret void @@ -1118,9 +1118,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1135,9 +1135,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1150,10 +1150,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1168,11 +1168,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1185,15 +1185,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1208,42 +1208,42 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>), [[PRED_COPY8]](<2 x s16>), [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>), [[PRED_COPY12]](<2 x s16>), [[PRED_COPY13]](<2 x s16>), [[PRED_COPY14]](<2 x s16>), [[PRED_COPY15]](<2 x s16>), [[PRED_COPY16]](<2 x s16>), [[PRED_COPY17]](<2 x s16>), [[PRED_COPY18]](<2 x s16>), [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>), [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>), [[PRED_COPY23]](<2 x s16>), [[PRED_COPY24]](<2 x s16>), [[PRED_COPY25]](<2 x s16>), [[PRED_COPY26]](<2 x s16>), [[PRED_COPY27]](<2 x s16>), [[PRED_COPY28]](<2 x s16>), [[PRED_COPY29]](<2 x s16>), [[PRED_COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1258,9 +1258,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1273,10 +1273,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1289,11 +1289,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1306,15 +1306,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1327,23 +1327,23 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1356,12 +1356,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1375,15 +1375,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -1397,18 +1397,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1422,30 +1422,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1459,55 +1459,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY16]](s32), [[PRED_COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY18]](s32), [[PRED_COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY26]](s32), [[PRED_COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY28]](s32), [[PRED_COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1521,9 +1521,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, ptr addrspace(1) undef ret void @@ -1534,9 +1534,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1551,9 +1551,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1566,11 +1566,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1583,15 +1583,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1605,15 +1605,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, ptr addrspace(1) undef store volatile i64 %arg1, ptr addrspace(1) undef @@ -1626,9 +1626,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, ptr addrspace(1) undef ret void @@ -1639,15 +1639,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, ptr addrspace(1) undef ret void @@ -1657,11 +1657,11 @@ ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -1679,18 +1679,18 @@ ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY1]], [[C]](s32) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (s32) from %ir.arg1 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -1699,7 +1699,7 @@ ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 @@ -1713,12 +1713,12 @@ ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PRED_COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1733,12 +1733,12 @@ ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1754,18 +1754,18 @@ ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C2]](s32) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4 @@ -1788,47 +1788,47 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX2]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, ptr addrspace(1) null @@ -1843,47 +1843,47 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg2, ptr addrspace(1) null @@ -1897,40 +1897,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1955,40 +1955,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32) @@ -2019,40 +2019,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 @@ -2073,40 +2073,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 @@ -2133,40 +2133,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 @@ -2187,40 +2187,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2259,40 +2259,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2327,40 +2327,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2411,40 +2411,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 @@ -2528,11 +2528,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2543,7 +2543,7 @@ ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 @@ -2560,11 +2560,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2575,7 +2575,7 @@ ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 @@ -2593,38 +2593,38 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY8]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -2640,40 +2640,40 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2722,22 +2722,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p3) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[PRED_COPY5]](s32), [[PRED_COPY6]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[MV]](p1), [[C]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[C]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[C]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0 @@ -2757,16 +2757,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p3) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p3) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY4]](p3), [[PRED_COPY5]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -7,30 +7,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> %idx ret <2 x ptr addrspace(1)> %gep @@ -42,20 +42,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY]](p3), [[PRED_COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x p3>) = PRED_COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY4]](<2 x p3>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x ptr addrspace(3)> %ptr, <2 x i32> %idx ret <2 x ptr addrspace(3)> %gep @@ -67,27 +67,27 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY6]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i32> %idx ret <2 x ptr addrspace(1)> %gep @@ -99,28 +99,28 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s64>) = PRED_COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[PRED_COPY6]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY7]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i64 %idx ret <2 x ptr addrspace(1)> %gep @@ -132,26 +132,26 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY4]](s32), [[PRED_COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY5]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i32 %idx ret <2 x ptr addrspace(1)> %gep @@ -163,19 +163,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -185,12 +185,12 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> ret <2 x ptr addrspace(1)> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -6,49 +6,49 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr16 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY9]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY12]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -61,12 +61,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -52,10 +52,10 @@ ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 @@ -68,8 +68,8 @@ ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind @@ -80,8 +80,8 @@ ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind @@ -92,8 +92,8 @@ ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -105,10 +105,10 @@ ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 1769482 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 @@ -122,11 +122,11 @@ ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 3080202 /* regdef:VReg_64 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY %1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 @@ -139,9 +139,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[PRED_COPY]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[EVEC]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 @@ -152,8 +152,8 @@ ; CHECK-LABEL: name: test_input_vgpr_imm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -163,8 +163,8 @@ ; CHECK-LABEL: name: test_input_sgpr_imm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -186,11 +186,11 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 1769481 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 1769481 /* reguse:VGPR_32 */, [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind @@ -202,10 +202,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[PRED_COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a) ret i32 %1 @@ -216,13 +216,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY1]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) @@ -233,14 +233,14 @@ ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %4, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %4, 1900553 /* reguse:SReg_32 */, [[PRED_COPY2]], 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY3]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY %4 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -254,20 +254,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 1769482 /* regdef:VGPR_32 */, def %4, 1769482 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 1769482 /* regdef:VGPR_32 */, def %4, 1769482 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[PRED_COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[PRED_COPY5]](tied-def 5) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY %3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY %4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY %5 + ; CHECK-NEXT: G_STORE [[PRED_COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 @@ -283,11 +283,11 @@ ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY1]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll @@ -13,7 +13,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_gv0, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i32, ptr addrspace(1) @const_gv0, align 4 ret i32 %load @@ -24,13 +24,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv1 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[TRUNC]](s1), [[GV]], [[GV1]] ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[SELECT]](p1) :: (dereferenceable invariant load (s32) from %ir.select, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %select = select i1 %cond, ptr addrspace(1) @const_gv0, ptr addrspace(1) @const_gv1 %load = load i32, ptr addrspace(1) %select, align 4 @@ -46,9 +46,9 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[GV]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (dereferenceable invariant load (s64) from @const_struct_gv + 8, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %load = load { i32, i64 }, ptr addrspace(1) @const_struct_gv, align 8 ret { i32, i64 } %load @@ -59,12 +59,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN @@ -77,12 +77,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN @@ -95,12 +95,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -8,13 +8,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void @@ -25,12 +25,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void @@ -41,13 +41,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void @@ -58,13 +58,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) %dst, ptr addrspace(1) %src, i64 256, i1 false) ret void @@ -75,12 +75,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 256, i1 false) ret void @@ -91,13 +91,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) %dst, ptr addrspace(1) %src, i16 256, i1 false) ret void @@ -108,13 +108,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false) ret void @@ -125,12 +125,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false) ret void @@ -141,13 +141,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false) ret void @@ -158,11 +158,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -175,11 +175,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) @@ -193,11 +193,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) @@ -211,12 +211,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i64(ptr addrspace(3) %dst, i8 %val, i64 256, i1 false) ret void @@ -227,11 +227,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i32(ptr addrspace(3) %dst, i8 %val, i32 256, i1 false) ret void @@ -242,12 +242,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3.i16(ptr addrspace(3) %dst, i8 %val, i16 256, i1 false) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -9,7 +9,7 @@ ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] - ; CHECK: $vgpr0 = COPY [[SUM]](s32) + ; CHECK: $vgpr0 = PRED_COPY [[SUM]](s32) ; CHECK: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p7) = G_PTR_ADD [[C]], [[C1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTR_ADD]](p7) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i8, ptr addrspace(7) null, i64 123 ret ptr addrspace(7) %gep @@ -25,12 +25,12 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p7>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p7>) = COPY [[PTR_ADD]](<2 x p7>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x p7>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p7>) = PRED_COPY [[PTR_ADD]](<2 x p7>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p7>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i8, <2 x ptr addrspace(7)> zeroinitializer, <2 x i64> ret <2 x ptr addrspace(7)> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -6,16 +6,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) ret ptr %masked @@ -26,14 +26,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 %mask) ret ptr %masked @@ -44,15 +44,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call ptr @llvm.ptrmask.p0.i16(ptr %ptr, i16 %mask) ret ptr %masked @@ -63,15 +63,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call ptr @llvm.ptrmask.p0.i1(ptr %ptr, i1 %mask) ret ptr %masked @@ -82,12 +82,12 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[MV]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask) ret ptr addrspace(3) %masked @@ -98,10 +98,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) ret ptr addrspace(3) %masked @@ -112,11 +112,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[TRUNC]](s16) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask) ret ptr addrspace(3) %masked @@ -127,11 +127,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[TRUNC]](s1) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3) %ptr, i1 %mask) ret ptr addrspace(3) %masked diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -6,13 +6,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -24,10 +24,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UADDSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -39,16 +39,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -60,16 +60,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -81,13 +81,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -99,10 +99,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SADDSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -114,16 +114,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -135,16 +135,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -156,13 +156,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -174,10 +174,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[USUBSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -189,16 +189,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -210,16 +210,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -231,13 +231,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -249,10 +249,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SSUBSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -264,16 +264,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -285,16 +285,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -306,13 +306,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -324,10 +324,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[USHLSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -339,16 +339,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -360,16 +360,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -381,13 +381,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -399,10 +399,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SSHLSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -414,16 +414,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -435,16 +435,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -7,10 +7,10 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 @@ -21,15 +21,15 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5 @@ -43,14 +43,14 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -62,19 +62,19 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -89,19 +89,19 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -116,14 +116,14 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -136,7 +136,7 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -145,12 +145,12 @@ ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](s32) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EVEC]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[EVEC1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 entry: @@ -163,12 +163,12 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[LOAD]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, ptr addrspace(5) %arg1, align 4 %add0 = add i32 %arg0, %arg1.load @@ -181,24 +181,24 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) %b.byval) @@ -213,37 +213,37 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -254,9 +254,9 @@ ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) inttoptr (i32 16 to ptr addrspace(5))) @@ -268,47 +268,47 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD2]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 @@ -323,37 +323,37 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -367,39 +367,39 @@ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -411,37 +411,37 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -460,39 +460,39 @@ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -510,8 +510,8 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 @@ -525,43 +525,43 @@ ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) @@ -574,24 +574,24 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY5]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -606,37 +606,37 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -655,39 +655,39 @@ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -702,37 +702,37 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 @@ -760,39 +760,39 @@ ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -809,46 +809,46 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 @@ -932,33 +932,33 @@ ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY43]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY44]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY45]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY46]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY47]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY48]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -977,46 +977,46 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 @@ -1096,64 +1096,64 @@ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: G_STORE [[PRED_COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY43]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY44]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY45]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY46]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY47]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY48]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1169,42 +1169,42 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1218,42 +1218,42 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %a) @@ -1267,40 +1267,40 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1314,40 +1314,40 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1361,47 +1361,47 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV7]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV8]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1415,42 +1415,42 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1464,48 +1464,48 @@ ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY11]](s32), [[PRED_COPY12]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY22]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY14]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY15]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY16]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY21]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -8,10 +8,10 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), -1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -8,36 +8,36 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 tail call void @external_void_func_void() ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i1 true ret i8 %E1 @@ -21,7 +21,7 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i8 255 ret i8 %E1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -12,24 +12,24 @@ ; GCN-LABEL: name: load_zeroinit_lds_global ; GCN: bb.1 (%ir-block.0): ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 40 ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @lds ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 - ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 36, 0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 36, 0 + ; GFX8: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_U32_]] ; GCN: $m0 = S_MOV_B32 -1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX8: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 0, 0, implicit $m0, implicit $exec - ; GFX9: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 40, 0, implicit $m0, implicit $exec + ; GFX9: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; GFX8: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec + ; GFX9: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY1]], 40, 0, implicit $m0, implicit $exec ; GFX8: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX8: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_LOAD_DWORDX2_IMM]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX8: BUFFER_STORE_DWORD_OFFSET [[DS_READ_B32_]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec - ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX9: FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX9: FLAT_STORE_DWORD [[PRED_COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_ENDPGM 0 %gep = getelementptr [256 x i32], ptr addrspace(3) @lds, i32 0, i32 10 %ld = load i32, ptr addrspace(3) %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -18,12 +18,12 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p1 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[PRED_COPY]](p0) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p1) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p1) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -39,12 +39,12 @@ ; GCN-LABEL: name: test_addrspacecast_p1_to_p0 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) - %0:_(p1) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p1) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[PRED_COPY]](p1) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p0) + %0:_(p1) = PRED_COPY $vgpr0_vgpr1 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -59,12 +59,12 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p4 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p4) = G_BITCAST [[COPY]](p0) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p4) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p4) = G_BITCAST [[PRED_COPY]](p0) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p4) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -79,12 +79,12 @@ ; GCN-LABEL: name: test_addrspacecast_p4_to_p0 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p4) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) - %0:_(p4) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[PRED_COPY]](p4) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p0) + %0:_(p4) = PRED_COPY $vgpr0_vgpr1 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -99,12 +99,12 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p999 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p0) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[PRED_COPY]](p0) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p999) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p999) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -119,12 +119,12 @@ ; GCN-LABEL: name: test_addrspacecast_p999_to_p0 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) - %0:_(p999) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p999) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[PRED_COPY]](p999) + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BITCAST]](p0) + %0:_(p999) = PRED_COPY $vgpr0_vgpr1 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -139,35 +139,35 @@ ; SIVI-LABEL: name: test_addrspacecast_p5_to_p0 ; SIVI: liveins: $vgpr0, $sgpr4_sgpr5 ; SIVI-NEXT: {{ $}} - ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) + ; SIVI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr4_sgpr5 + ; SIVI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 + ; SIVI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY]](p4) ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY2]], [[C]](s64) ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY1]](p5) ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; SIVI-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]] + ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY1]](p5), [[C1]] ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] - ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; SIVI-NEXT: $vgpr0_vgpr1 = PRED_COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](p5), [[C]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) - %0:_(p5) = COPY $vgpr0 + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[SELECT]](p0) + %0:_(p5) = PRED_COPY $vgpr0 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -182,16 +182,16 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p5 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[PRED_COPY]](p0), 0 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](p0), [[C1]] ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](p5) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SELECT]](p5) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p5) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + $vgpr0 = PRED_COPY %1 ... --- @@ -207,35 +207,35 @@ ; SIVI-LABEL: name: test_addrspacecast_p3_to_p0 ; SIVI: liveins: $vgpr0, $sgpr4_sgpr5 ; SIVI-NEXT: {{ $}} - ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) + ; SIVI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr4_sgpr5 + ; SIVI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; SIVI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY]](p4) ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY2]], [[C]](s64) ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) - ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) + ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY1]](p3) ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; SIVI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]] + ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY1]](p3), [[C1]] ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] - ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; SIVI-NEXT: $vgpr0_vgpr1 = PRED_COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) - ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p3) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](p3), [[C]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) - %0:_(p3) = COPY $vgpr0 + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[SELECT]](p0) + %0:_(p3) = PRED_COPY $vgpr0 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -250,16 +250,16 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p3 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0 - ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[PRED_COPY]](p0), 0 + ; GCN-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](p0), [[C1]] ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] - ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](p3) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SELECT]](p3) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p3) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + $vgpr0 = PRED_COPY %1 ... --- @@ -274,15 +274,15 @@ ; GCN-LABEL: name: test_addrspacecast_v2p0_to_v2p1 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p0>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p0>) ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[UV]](p0) ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(p1) = G_BITCAST [[UV1]](p0) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[BITCAST]](p1), [[BITCAST1]](p1) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) - %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR]](<2 x p1>) + %0:_(<2 x p0>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY %1 ... --- @@ -297,15 +297,15 @@ ; GCN-LABEL: name: test_addrspacecast_v2p1_to_v2p0 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p1>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p1>) ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[UV]](p1) ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(p0) = G_BITCAST [[UV1]](p1) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[BITCAST]](p0), [[BITCAST1]](p0) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR]](<2 x p0>) + %0:_(<2 x p1>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY %1 ... --- @@ -320,8 +320,8 @@ ; GCN-LABEL: name: test_addrspacecast_v2p0_to_v2p3 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p0>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p0>) ; GCN-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GCN-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0 @@ -331,10 +331,10 @@ ; GCN-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C1]] ; GCN-NEXT: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C]] ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUILD_VECTOR]](<2 x p3>) + %0:_(<2 x p0>) = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -349,12 +349,12 @@ ; SIVI-LABEL: name: test_addrspacecast_v2p3_to_v2p0 ; SIVI: liveins: $vgpr0_vgpr1, $sgpr4_sgpr5 ; SIVI-NEXT: {{ $}} - ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; SIVI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) - ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) + ; SIVI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr4_sgpr5 + ; SIVI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x p3>) = PRED_COPY $vgpr0_vgpr1 + ; SIVI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x p3>) + ; SIVI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY]](p4) ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY2]], [[C]](s64) ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -362,20 +362,20 @@ ; SIVI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; SIVI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]] ; SIVI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] - ; SIVI-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; SIVI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](s64) + ; SIVI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY]](p4) + ; SIVI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY3]], [[C]](s64) ; SIVI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) ; SIVI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; SIVI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) ; SIVI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]] ; SIVI-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C2]] ; SIVI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) - ; SIVI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + ; SIVI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR]](<2 x p0>) ; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p3>) = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p3>) ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) @@ -391,10 +391,10 @@ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) - %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR]](<2 x p0>) + %0:_(<2 x p3>) = PRED_COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY %1 ... --- @@ -406,12 +406,12 @@ ; GCN-LABEL: name: test_addrspacecast_p4_to_p6 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[COPY]](p4), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) - %0:_(p4) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[PRED_COPY]](p4), 0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) + %0:_(p4) = PRED_COPY $vgpr0_vgpr1 %1:_(p6) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + $vgpr0 = PRED_COPY %1 ... --- @@ -423,14 +423,14 @@ ; GCN-LABEL: name: test_addrspacecast_p6_to_p4_0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p6) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p6) ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) - %0:_(p6) = COPY $vgpr0 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[MV]](p4) + %0:_(p6) = PRED_COPY $vgpr0 %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -444,14 +444,14 @@ ; GCN-LABEL: name: test_addrspacecast_p6_to_p4_0xdeadbeef ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p6) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p6) ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -559038737 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) - %0:_(p6) = COPY $vgpr0 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[MV]](p4) + %0:_(p6) = PRED_COPY $vgpr0 %1:_(p4) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- @@ -463,12 +463,12 @@ ; GCN-LABEL: name: test_addrspacecast_p0_to_p6 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[COPY]](p0), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) - %0:_(p0) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[PRED_COPY]](p0), 0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) + %0:_(p0) = PRED_COPY $vgpr0_vgpr1 %1:_(p6) = G_ADDRSPACE_CAST %0 - $vgpr0 = COPY %1 + $vgpr0 = PRED_COPY %1 ... --- @@ -480,14 +480,14 @@ ; GCN-LABEL: name: test_addrspacecast_p6_to_p0 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 - ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p6) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p6) ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) - %0:_(p6) = COPY $vgpr0 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[MV]](p0) + %0:_(p6) = PRED_COPY $vgpr0 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... --- name: test_addrspacecast_p5_fi_to_p0 @@ -501,25 +501,25 @@ ; SIVI-LABEL: name: test_addrspacecast_p5_fi_to_p0 ; SIVI: liveins: $sgpr4_sgpr5 ; SIVI-NEXT: {{ $}} - ; SIVI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; SIVI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr4_sgpr5 ; SIVI-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; SIVI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) + ; SIVI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY]](p4) ; SIVI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; SIVI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY1]], [[C]](s64) ; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) ; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) - ; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[MV]](p0) - ; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0) + ; SIVI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p0) = PRED_COPY [[MV]](p0) + ; SIVI-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY2]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0 ; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64) ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY [[MV]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]](p0) %0:_(p5) = G_FRAME_INDEX %stack.0 %1:_(p0) = G_ADDRSPACE_CAST %0 - $vgpr0_vgpr1 = COPY %1 + $vgpr0_vgpr1 = PRED_COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir @@ -45,9 +45,9 @@ ; GCN-LABEL: name: test_workitem_id_x_unpacked ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 8 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 8 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) S_ENDPGM 0, implicit %0 @@ -66,9 +66,9 @@ ; GCN-LABEL: name: test_workitem_id_y_unpacked ; GCN: liveins: $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 3 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) S_ENDPGM 0, implicit %0 @@ -87,9 +87,9 @@ ; GCN-LABEL: name: test_workitem_id_z_unpacked ; GCN: liveins: $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 2 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) S_ENDPGM 0, implicit %0 @@ -107,9 +107,9 @@ ; GCN-LABEL: name: test_workitem_id_x_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) S_ENDPGM 0, implicit %0 @@ -127,9 +127,9 @@ ; GCN-LABEL: name: test_workitem_id_y_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) @@ -149,9 +149,9 @@ ; GCN-LABEL: name: test_workitem_id_z_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -456,8 +456,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -538,8 +538,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -558,10 +558,10 @@ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -569,10 +569,10 @@ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -638,13 +638,13 @@ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[PRED_COPY]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -565,17 +565,17 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -601,17 +601,17 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -662,29 +662,29 @@ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -714,29 +714,29 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY4]], [[TRUNC4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY5]], [[TRUNC5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY6]], [[TRUNC6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY7]], [[TRUNC7]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -865,29 +865,29 @@ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -935,10 +935,10 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) @@ -1044,53 +1044,53 @@ ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[PRED_COPY12]] ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[PRED_COPY13]] ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[PRED_COPY14]] ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[PRED_COPY15]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1223,14 +1223,14 @@ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC]](s4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC1]](s4) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC2]](s4) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC3]](s4) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC4]](s4) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC5]](s4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC6]](s4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC7]](s4) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) @@ -1334,41 +1334,41 @@ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1463,23 +1463,23 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY4]], [[TRUNC4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY5]], [[TRUNC5]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -1529,14 +1529,14 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) @@ -1705,14 +1705,14 @@ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) @@ -1780,53 +1780,53 @@ ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[PRED_COPY12]] ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[PRED_COPY13]] ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[PRED_COPY14]] ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[PRED_COPY15]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1881,10 +1881,10 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) @@ -1899,14 +1899,14 @@ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) @@ -1921,18 +1921,18 @@ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) @@ -2050,8 +2050,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -2060,10 +2060,10 @@ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) @@ -2072,12 +2072,12 @@ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) @@ -2086,14 +2086,14 @@ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) @@ -2102,16 +2102,16 @@ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) @@ -2120,18 +2120,18 @@ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC21]](s8) ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) @@ -2140,20 +2140,20 @@ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC24]](s8) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC25]](s8) ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) @@ -2640,30 +2640,30 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -14,11 +14,11 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX8-LABEL: name: bswap_s8 @@ -51,10 +51,10 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -86,11 +86,11 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX8-LABEL: name: bswap_s24 @@ -147,18 +147,18 @@ ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -193,26 +193,26 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY4]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY6]](s32) + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[PRED_COPY3]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -284,34 +284,34 @@ ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[PRED_COPY3]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[PRED_COPY5]](s32) ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY6]](s32) ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -88,8 +88,8 @@ ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -155,13 +155,13 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[SUB1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -187,8 +187,8 @@ ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -98,8 +98,8 @@ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -173,14 +173,14 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32) ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[SUB1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -208,8 +208,8 @@ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -13,8 +13,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 @@ -35,8 +35,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s9) = G_TRUNC %0 @@ -130,8 +130,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -196,12 +196,12 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -225,8 +225,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 @@ -285,67 +285,67 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[C4]](s16) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[COPY2]], [[C5]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[COPY1]], [[SHL]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY1]], [[C5]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY3]], [[C6]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY2]], [[C6]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[COPY4]], [[C7]](s16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY3]], [[C7]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[C8]](s16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY4]], [[C8]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[COPY6]], [[C9]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY5]], [[C9]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[COPY7]], [[C10]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY6]], [[C10]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[COPY8]], [[C11]](s16) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY7]], [[C11]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[COPY9]], [[C12]](s16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY8]], [[C12]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[COPY10]], [[C13]](s16) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY9]], [[C13]](s16) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[COPY11]], [[C14]](s16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY10]], [[C14]](s16) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[COPY12]], [[C15]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY11]], [[C15]](s16) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[COPY13]], [[C16]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY12]], [[C16]](s16) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[COPY14]], [[C17]](s16) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY13]], [[C17]](s16) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[COPY15]], [[C18]](s16) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY14]], [[C18]](s16) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[OR12]], [[SHL13]] ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[C4]], [[C19]](s16) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s64) = PRED_COPY [[MV2]](s64) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C20]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C20]](s32) @@ -360,17 +360,17 @@ ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL16]] ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C20]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL17]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C20]](s32) - ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL18]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C20]](s32) + ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL18]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR17]](s32), [[OR18]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV5]](s128), 0 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[COPY16]], [[EXTRACT]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[PRED_COPY15]], [[EXTRACT]] ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](s32) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -84,9 +84,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -149,12 +149,12 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -176,9 +176,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -94,9 +94,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -167,13 +167,13 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) @@ -197,9 +197,9 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -12,8 +12,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -30,8 +30,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -48,8 +48,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -66,8 +66,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -84,8 +84,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -102,8 +102,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<5 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -121,8 +121,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<6 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -140,8 +140,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<7 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -159,8 +159,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<8 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -178,8 +178,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<16 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -235,8 +235,8 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -272,8 +272,8 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -290,8 +290,8 @@ ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -337,8 +337,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -363,8 +363,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -586,8 +586,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 @@ -609,9 +609,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 @@ -633,9 +633,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 3 @@ -657,9 +657,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 4 @@ -681,9 +681,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 5 @@ -705,9 +705,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 7 @@ -821,8 +821,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -843,8 +843,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -865,8 +865,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -965,8 +965,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -985,8 +985,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1005,8 +1005,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1025,8 +1025,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_CONSTANT i64 0 %2:_(s32) = G_TRUNC %1 @@ -1047,8 +1047,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1071,8 +1071,8 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1092,8 +1092,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32), implicit [[DEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32), implicit [[DEF]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1119,8 +1119,8 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY [[UV1]](p3) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -214,8 +214,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -231,8 +231,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -248,8 +248,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -265,8 +265,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -282,8 +282,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 64 $vgpr0 = COPY %1 @@ -299,8 +299,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -698,8 +698,8 @@ ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -721,8 +721,8 @@ ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -744,8 +744,8 @@ ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -778,8 +778,8 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) @@ -799,8 +799,8 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) @@ -820,8 +820,8 @@ ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -260,9 +260,9 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -434,9 +434,9 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) @@ -459,11 +459,11 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] @@ -490,11 +490,11 @@ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] @@ -531,33 +531,33 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -573,33 +573,33 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -615,33 +615,33 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -709,9 +709,9 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -723,9 +723,9 @@ ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY1]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -965,9 +965,9 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -979,9 +979,9 @@ ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY1]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -993,9 +993,9 @@ ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY2]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -219,9 +219,9 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -229,14 +229,14 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -250,9 +250,9 @@ ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -265,9 +265,9 @@ ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) @@ -318,9 +318,9 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -463,8 +463,8 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] @@ -485,9 +485,9 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) @@ -515,9 +515,9 @@ ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) @@ -559,30 +559,30 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -600,30 +600,30 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -641,30 +641,30 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -735,22 +735,22 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL5]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -764,9 +764,9 @@ ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND10]](s16) ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[ZEXT5]](s32) @@ -779,9 +779,9 @@ ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[ZEXT7]](s32) @@ -793,20 +793,20 @@ ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[COPY13]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[PRED_COPY7]](s32) ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND18]](s16) ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND20]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL10]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[PRED_COPY8]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[C]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY9]], [[SHL10]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) @@ -818,9 +818,9 @@ ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[SHL9]], [[C1]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[PRED_COPY11]](s32) ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND22]](s16) ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C1]] ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND24]], [[ZEXT11]](s32) @@ -897,9 +897,9 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR5]] ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL5]] + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL5]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -928,10 +928,10 @@ ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND10]](s16) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR12]] ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL10]] + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL10]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) @@ -1090,9 +1090,9 @@ ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -1100,14 +1100,14 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -1121,9 +1121,9 @@ ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -1136,9 +1136,9 @@ ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) @@ -1159,9 +1159,9 @@ ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[PRED_COPY7]](s32) ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) @@ -1173,9 +1173,9 @@ ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[PRED_COPY8]](s32) ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) @@ -1183,14 +1183,14 @@ ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[PRED_COPY9]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[PRED_COPY10]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[C1]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY11]], [[SHL12]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) @@ -1204,9 +1204,9 @@ ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[PRED_COPY13]](s32) ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) @@ -1219,9 +1219,9 @@ ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[PRED_COPY14]](s32) ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) @@ -1276,9 +1276,9 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -1332,10 +1332,10 @@ ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C1]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL12]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -135,9 +135,9 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -149,9 +149,9 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -163,9 +163,9 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -191,9 +191,9 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX8-LABEL: name: test_icmp_s24 @@ -202,9 +202,9 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX9-LABEL: name: test_icmp_s24 @@ -213,9 +213,9 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s24) = G_CONSTANT i24 0 @@ -904,25 +904,25 @@ ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: test_icmp_s33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX9-LABEL: name: test_icmp_s33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -423,256 +423,256 @@ ; CHECK-NEXT: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK-NEXT: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD3]](p5) + ; CHECK-NEXT: G_STORE [[UV1]](s32), [[PRED_COPY]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK-NEXT: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD4]](p5) + ; CHECK-NEXT: G_STORE [[UV2]](s32), [[PRED_COPY1]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK-NEXT: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD5]](p5) + ; CHECK-NEXT: G_STORE [[UV3]](s32), [[PRED_COPY2]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD6]](p5) + ; CHECK-NEXT: G_STORE [[UV4]](s32), [[PRED_COPY3]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK-NEXT: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD7]](p5) + ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PRED_COPY4]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD8]](p5) + ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PRED_COPY5]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK-NEXT: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD9]](p5) + ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PRED_COPY6]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK-NEXT: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD10]](p5) + ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PRED_COPY7]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK-NEXT: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD11]](p5) + ; CHECK-NEXT: G_STORE [[UV9]](s32), [[PRED_COPY8]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD12]](p5) + ; CHECK-NEXT: G_STORE [[UV10]](s32), [[PRED_COPY9]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK-NEXT: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD13]](p5) + ; CHECK-NEXT: G_STORE [[UV11]](s32), [[PRED_COPY10]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK-NEXT: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD14]](p5) + ; CHECK-NEXT: G_STORE [[UV12]](s32), [[PRED_COPY11]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK-NEXT: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD15]](p5) + ; CHECK-NEXT: G_STORE [[UV13]](s32), [[PRED_COPY12]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK-NEXT: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD16]](p5) + ; CHECK-NEXT: G_STORE [[UV14]](s32), [[PRED_COPY13]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK-NEXT: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD17]](p5) + ; CHECK-NEXT: G_STORE [[UV15]](s32), [[PRED_COPY14]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD18]](p5) + ; CHECK-NEXT: G_STORE [[UV16]](s32), [[PRED_COPY15]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK-NEXT: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD19]](p5) + ; CHECK-NEXT: G_STORE [[UV17]](s32), [[PRED_COPY16]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK-NEXT: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD20]](p5) + ; CHECK-NEXT: G_STORE [[UV18]](s32), [[PRED_COPY17]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK-NEXT: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD21]](p5) + ; CHECK-NEXT: G_STORE [[UV19]](s32), [[PRED_COPY18]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK-NEXT: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD22]](p5) + ; CHECK-NEXT: G_STORE [[UV20]](s32), [[PRED_COPY19]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK-NEXT: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD23]](p5) + ; CHECK-NEXT: G_STORE [[UV21]](s32), [[PRED_COPY20]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK-NEXT: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD24]](p5) + ; CHECK-NEXT: G_STORE [[UV22]](s32), [[PRED_COPY21]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK-NEXT: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD25]](p5) + ; CHECK-NEXT: G_STORE [[UV23]](s32), [[PRED_COPY22]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK-NEXT: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD26]](p5) + ; CHECK-NEXT: G_STORE [[UV24]](s32), [[PRED_COPY23]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK-NEXT: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD27]](p5) + ; CHECK-NEXT: G_STORE [[UV25]](s32), [[PRED_COPY24]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK-NEXT: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD28]](p5) + ; CHECK-NEXT: G_STORE [[UV26]](s32), [[PRED_COPY25]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK-NEXT: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD29]](p5) + ; CHECK-NEXT: G_STORE [[UV27]](s32), [[PRED_COPY26]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK-NEXT: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD30]](p5) + ; CHECK-NEXT: G_STORE [[UV28]](s32), [[PRED_COPY27]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK-NEXT: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD31]](p5) + ; CHECK-NEXT: G_STORE [[UV29]](s32), [[PRED_COPY28]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK-NEXT: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD32]](p5) + ; CHECK-NEXT: G_STORE [[UV30]](s32), [[PRED_COPY29]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD33]](p5) + ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PRED_COPY30]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD34]](p5) + ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PRED_COPY31]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD35]](p5) + ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PRED_COPY32]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD36]](p5) + ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PRED_COPY33]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD37]](p5) + ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PRED_COPY34]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD38]](p5) + ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PRED_COPY35]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD39]](p5) + ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PRED_COPY36]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD40]](p5) + ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PRED_COPY37]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD41]](p5) + ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PRED_COPY38]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK-NEXT: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD42]](p5) + ; CHECK-NEXT: G_STORE [[UV40]](s32), [[PRED_COPY39]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK-NEXT: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD43]](p5) + ; CHECK-NEXT: G_STORE [[UV41]](s32), [[PRED_COPY40]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK-NEXT: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD44]](p5) + ; CHECK-NEXT: G_STORE [[UV42]](s32), [[PRED_COPY41]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK-NEXT: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD45]](p5) + ; CHECK-NEXT: G_STORE [[UV43]](s32), [[PRED_COPY42]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK-NEXT: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD46]](p5) + ; CHECK-NEXT: G_STORE [[UV44]](s32), [[PRED_COPY43]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK-NEXT: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD47]](p5) + ; CHECK-NEXT: G_STORE [[UV45]](s32), [[PRED_COPY44]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK-NEXT: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD48]](p5) + ; CHECK-NEXT: G_STORE [[UV46]](s32), [[PRED_COPY45]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK-NEXT: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD49]](p5) + ; CHECK-NEXT: G_STORE [[UV47]](s32), [[PRED_COPY46]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK-NEXT: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD50]](p5) + ; CHECK-NEXT: G_STORE [[UV48]](s32), [[PRED_COPY47]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK-NEXT: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD51]](p5) + ; CHECK-NEXT: G_STORE [[UV49]](s32), [[PRED_COPY48]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK-NEXT: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD52]](p5) + ; CHECK-NEXT: G_STORE [[UV50]](s32), [[PRED_COPY49]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK-NEXT: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD53]](p5) + ; CHECK-NEXT: G_STORE [[UV51]](s32), [[PRED_COPY50]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK-NEXT: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD54]](p5) + ; CHECK-NEXT: G_STORE [[UV52]](s32), [[PRED_COPY51]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK-NEXT: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD55]](p5) + ; CHECK-NEXT: G_STORE [[UV53]](s32), [[PRED_COPY52]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK-NEXT: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD56]](p5) + ; CHECK-NEXT: G_STORE [[UV54]](s32), [[PRED_COPY53]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK-NEXT: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD57]](p5) + ; CHECK-NEXT: G_STORE [[UV55]](s32), [[PRED_COPY54]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK-NEXT: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD58]](p5) + ; CHECK-NEXT: G_STORE [[UV56]](s32), [[PRED_COPY55]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK-NEXT: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY56:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD59]](p5) + ; CHECK-NEXT: G_STORE [[UV57]](s32), [[PRED_COPY56]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK-NEXT: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY57:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD60]](p5) + ; CHECK-NEXT: G_STORE [[UV58]](s32), [[PRED_COPY57]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK-NEXT: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY58:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD61]](p5) + ; CHECK-NEXT: G_STORE [[UV59]](s32), [[PRED_COPY58]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK-NEXT: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY59:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD62]](p5) + ; CHECK-NEXT: G_STORE [[UV60]](s32), [[PRED_COPY59]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK-NEXT: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY60:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD63]](p5) + ; CHECK-NEXT: G_STORE [[UV61]](s32), [[PRED_COPY60]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK-NEXT: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY61:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD64]](p5) + ; CHECK-NEXT: G_STORE [[UV62]](s32), [[PRED_COPY61]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK-NEXT: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY62:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD65]](p5) + ; CHECK-NEXT: G_STORE [[UV63]](s32), [[PRED_COPY62]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) ; CHECK-NEXT: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]] ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]] @@ -758,49 +758,49 @@ ; CHECK-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) ; CHECK-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) ; CHECK-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) + ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C68]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) + ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C69]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) + ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C70]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) + ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C71]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) + ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C72]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) + ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C73]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) + ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C74]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) + ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C75]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) + ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C76]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) + ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C77]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) + ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C78]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64) + ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C79]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -1694,8 +1694,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1721,8 +1721,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1748,8 +1748,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16776961 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1770,12 +1770,12 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -8,39 +8,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_swap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -53,39 +53,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -98,39 +98,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_sub_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -143,39 +143,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -189,39 +189,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -234,39 +234,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -279,39 +279,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -324,39 +324,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_and_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -369,39 +369,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_or_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -414,39 +414,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_xor_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -459,39 +459,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_inc_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -504,39 +504,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_dec_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -549,43 +549,43 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -598,45 +598,45 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -649,55 +649,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -710,55 +710,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) @@ -771,45 +771,45 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -822,55 +822,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -883,55 +883,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -944,57 +944,57 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -1007,39 +1007,39 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) @@ -1052,49 +1052,49 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -1107,59 +1107,59 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -1172,61 +1172,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -8,51 +8,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -65,61 +65,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -133,26 +133,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -161,35 +161,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -198,10 +198,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -216,26 +216,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -244,35 +244,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -281,10 +281,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -299,61 +299,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -367,26 +367,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -395,35 +395,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -432,10 +432,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -450,26 +450,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -478,35 +478,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -515,10 +515,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -533,29 +533,29 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -563,38 +563,38 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -602,10 +602,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -621,61 +621,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -689,26 +689,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -717,35 +717,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -754,10 +754,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -772,29 +772,29 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -802,38 +802,38 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -841,10 +841,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -860,29 +860,29 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -890,38 +890,38 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -929,10 +929,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -948,26 +948,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -976,35 +976,35 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1013,10 +1013,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1031,29 +1031,29 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1061,38 +1061,38 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1100,10 +1100,10 @@ ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1119,23 +1119,23 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -1144,23 +1144,23 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -1176,26 +1176,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1206,26 +1206,26 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1244,31 +1244,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1281,31 +1281,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1327,31 +1327,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1364,31 +1364,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1410,26 +1410,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1440,26 +1440,26 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1478,31 +1478,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1515,31 +1515,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1561,31 +1561,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1598,31 +1598,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1644,34 +1644,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1683,34 +1683,34 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1732,26 +1732,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1762,26 +1762,26 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1800,31 +1800,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1837,31 +1837,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1883,34 +1883,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1922,34 +1922,34 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -1971,34 +1971,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2010,34 +2010,34 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2059,31 +2059,31 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2096,31 +2096,31 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2142,34 +2142,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2181,34 +2181,34 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2230,51 +2230,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2287,51 +2287,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2344,51 +2344,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2401,51 +2401,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2458,51 +2458,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2515,51 +2515,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2572,51 +2572,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2629,51 +2629,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2686,43 +2686,43 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: load_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2735,47 +2735,47 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX10NSA-LABEL: name: load_1d_V2 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2788,43 +2788,43 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 7) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[PRED_COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (s32), addrspace 7) + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[PRED_COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (s32), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2837,21 +2837,21 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>), addrspace 7) @@ -2860,21 +2860,21 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<2 x s32>), addrspace 7) @@ -2890,51 +2890,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_glc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2947,51 +2947,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3004,51 +3004,51 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_glc_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3061,23 +3061,23 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3086,23 +3086,23 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3118,23 +3118,23 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3143,23 +3143,23 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3175,23 +3175,23 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3200,23 +3200,23 @@ ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable store (<4 x s32>), addrspace 7) @@ -3234,10 +3234,10 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_dmask0 ; GFX10NSA: bb.1.main_body: @@ -3245,10 +3245,10 @@ ; GFX10NSA-NEXT: {{ $}} ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3261,55 +3261,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3325,22 +3325,22 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -3348,31 +3348,31 @@ ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -3380,10 +3380,10 @@ ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3400,27 +3400,27 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -3430,36 +3430,36 @@ ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_3d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -3469,10 +3469,10 @@ ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -3490,30 +3490,30 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -3522,39 +3522,39 @@ ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -3563,10 +3563,10 @@ ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -7,41 +7,41 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -52,18 +52,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -73,26 +73,26 @@ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -103,18 +103,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -129,25 +129,25 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -158,8 +158,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -170,18 +170,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -196,29 +196,29 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -229,45 +229,45 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -281,19 +281,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -304,30 +304,30 @@ ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -341,19 +341,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -369,26 +369,26 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) @@ -402,8 +402,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -417,19 +417,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>) ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -445,33 +445,33 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -485,19 +485,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -508,18 +508,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] @@ -528,26 +528,26 @@ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -558,19 +558,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -581,18 +581,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -606,33 +606,33 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -643,18 +643,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] @@ -665,33 +665,33 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -702,8 +702,8 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -714,15 +714,15 @@ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -733,8 +733,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -745,18 +745,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -771,29 +771,29 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -804,18 +804,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -829,29 +829,29 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -862,18 +862,18 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] @@ -884,29 +884,29 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -917,23 +917,23 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -944,45 +944,45 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -996,19 +996,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1019,30 +1019,30 @@ ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1056,19 +1056,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1079,30 +1079,30 @@ ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1116,19 +1116,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1143,26 +1143,26 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) @@ -1172,8 +1172,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1187,19 +1187,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1212,26 +1212,26 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) @@ -1241,8 +1241,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1256,19 +1256,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1281,26 +1281,26 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) @@ -1310,8 +1310,8 @@ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1325,19 +1325,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1353,33 +1353,33 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1393,19 +1393,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1420,33 +1420,33 @@ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1460,19 +1460,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1485,33 +1485,33 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1525,19 +1525,19 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -1550,33 +1550,33 @@ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -6,20 +6,20 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -30,22 +30,22 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -56,23 +56,23 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -83,24 +83,24 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -111,23 +111,23 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -141,24 +141,24 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -172,25 +172,25 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -204,26 +204,26 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -237,10 +237,10 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -251,22 +251,22 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -277,12 +277,12 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -293,24 +293,24 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -321,23 +321,23 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -348,13 +348,13 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -365,25 +365,25 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -394,25 +394,25 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -423,24 +423,24 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -451,14 +451,14 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -469,23 +469,23 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -499,25 +499,25 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -531,25 +531,25 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -563,26 +563,26 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -596,26 +596,26 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -629,26 +629,26 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -662,27 +662,27 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -696,27 +696,27 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -730,27 +730,27 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -764,27 +764,27 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -8,50 +8,50 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v @@ -62,58 +62,58 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX6-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -8,40 +8,40 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) - ; GFX6-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.3d.f32.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -52,46 +52,46 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX6-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %val, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -8,55 +8,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -68,61 +68,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -134,71 +134,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -210,71 +210,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cube ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -286,61 +286,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_1darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -352,71 +352,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_2darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -428,65 +428,65 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -498,67 +498,67 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -570,61 +570,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -636,71 +636,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -712,67 +712,67 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -784,74 +784,74 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -863,67 +863,67 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -935,71 +935,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1011,70 +1011,70 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1086,74 +1086,74 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1165,71 +1165,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1241,28 +1241,28 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) @@ -1270,47 +1270,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1322,74 +1322,74 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1401,80 +1401,80 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1486,26 +1486,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -1513,45 +1513,45 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1563,82 +1563,82 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1650,38 +1650,38 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1692,47 +1692,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1743,10 +1743,10 @@ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1758,28 +1758,28 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -1787,47 +1787,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1839,86 +1839,86 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1930,28 +1930,28 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -1959,47 +1959,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2011,34 +2011,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2047,43 +2047,43 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2091,10 +2091,10 @@ ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2106,30 +2106,30 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -2137,49 +2137,49 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2191,36 +2191,36 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2229,45 +2229,45 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2275,10 +2275,10 @@ ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2290,26 +2290,26 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -2317,45 +2317,45 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2367,82 +2367,82 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2454,28 +2454,28 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -2483,47 +2483,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2535,86 +2535,86 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2626,28 +2626,28 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -2655,47 +2655,47 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2707,34 +2707,34 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2743,43 +2743,43 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2787,10 +2787,10 @@ ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2802,30 +2802,30 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) @@ -2833,49 +2833,49 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2887,36 +2887,36 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2925,45 +2925,45 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -2971,10 +2971,10 @@ ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2986,61 +2986,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3052,71 +3052,71 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3128,67 +3128,67 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3200,74 +3200,74 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3279,55 +3279,55 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3339,61 +3339,61 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3405,65 +3405,65 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3475,67 +3475,67 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3547,38 +3547,38 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -3586,44 +3586,44 @@ ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: sample_c_d_o_2darray_V1 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -3631,7 +3631,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3643,38 +3643,38 @@ ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -3683,45 +3683,45 @@ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: sample_c_d_o_2darray_V2 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) @@ -3730,8 +3730,8 @@ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll @@ -7,71 +7,71 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -83,73 +83,73 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -161,75 +161,75 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX10-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX11-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -241,77 +241,77 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX10-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX10-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_o_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX11-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX11-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll @@ -7,71 +7,71 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_1d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -83,81 +83,81 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_2d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -169,38 +169,38 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -211,47 +211,47 @@ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_3d_g16_a16 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -262,10 +262,10 @@ ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -7,69 +7,69 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -81,79 +81,79 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -165,99 +165,99 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -269,73 +269,73 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -347,83 +347,83 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -435,73 +435,73 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -513,83 +513,83 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -601,77 +601,77 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -683,89 +683,89 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -777,69 +777,69 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -851,79 +851,79 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -935,73 +935,73 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1013,83 +1013,83 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1101,73 +1101,73 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1179,83 +1179,83 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1267,77 +1267,77 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1349,89 +1349,89 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1443,85 +1443,85 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1533,89 +1533,89 @@ ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -10,80 +10,80 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -95,20 +95,20 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) @@ -118,20 +118,20 @@ ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) @@ -140,39 +140,39 @@ ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[PRED_COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[PRED_COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -183,24 +183,24 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 @@ -208,24 +208,24 @@ ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] @@ -247,27 +247,27 @@ ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -278,27 +278,27 @@ ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -314,24 +314,24 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) @@ -340,21 +340,21 @@ ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -365,42 +365,42 @@ ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -2470,17 +2470,17 @@ ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2582,17 +2582,17 @@ ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4783,8 +4783,8 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4826,8 +4826,8 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4869,8 +4869,8 @@ ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4957,8 +4957,8 @@ ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5034,8 +5034,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5111,8 +5111,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5294,8 +5294,8 @@ ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5328,8 +5328,8 @@ ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5407,8 +5407,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5441,8 +5441,8 @@ ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5520,8 +5520,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5554,8 +5554,8 @@ ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5704,8 +5704,8 @@ ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5738,8 +5738,8 @@ ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -5772,8 +5772,8 @@ ; CI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; CI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; CI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; CI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -5849,8 +5849,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5883,8 +5883,8 @@ ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -5917,8 +5917,8 @@ ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -5994,8 +5994,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -6028,8 +6028,8 @@ ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -6062,8 +6062,8 @@ ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -7254,10 +7254,10 @@ ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7356,10 +7356,10 @@ ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7458,10 +7458,10 @@ ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7524,10 +7524,10 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7576,10 +7576,10 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7628,10 +7628,10 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7656,10 +7656,10 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7670,10 +7670,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7684,10 +7684,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7712,10 +7712,10 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7726,10 +7726,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7740,10 +7740,10 @@ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -2500,17 +2500,17 @@ ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2612,17 +2612,17 @@ ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4858,8 +4858,8 @@ ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5084,8 +5084,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5526,8 +5526,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5560,8 +5560,8 @@ ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -6068,8 +6068,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 @@ -6103,8 +6103,8 @@ ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; GFX9-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) @@ -6136,8 +6136,8 @@ ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -3764,17 +3764,17 @@ ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3795,17 +3795,17 @@ ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3826,17 +3826,17 @@ ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3968,17 +3968,17 @@ ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4004,17 +4004,17 @@ ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -4045,17 +4045,17 @@ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -10454,8 +10454,8 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10503,8 +10503,8 @@ ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10546,8 +10546,8 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10595,8 +10595,8 @@ ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10683,8 +10683,8 @@ ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10766,8 +10766,8 @@ ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10843,8 +10843,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10926,8 +10926,8 @@ ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -11231,8 +11231,8 @@ ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11265,8 +11265,8 @@ ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11357,8 +11357,8 @@ ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11391,8 +11391,8 @@ ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11470,8 +11470,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11504,8 +11504,8 @@ ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11596,8 +11596,8 @@ ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11630,8 +11630,8 @@ ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11816,8 +11816,8 @@ ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11850,8 +11850,8 @@ ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -11884,8 +11884,8 @@ ; SI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; SI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; SI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -11967,8 +11967,8 @@ ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12001,8 +12001,8 @@ ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12035,8 +12035,8 @@ ; CI-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; CI-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; CI-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; CI-MESA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -12112,8 +12112,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12146,8 +12146,8 @@ ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12180,8 +12180,8 @@ ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -12263,8 +12263,8 @@ ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12297,8 +12297,8 @@ ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12331,8 +12331,8 @@ ; GFX9-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -14190,10 +14190,10 @@ ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14204,10 +14204,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14306,10 +14306,10 @@ ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14408,10 +14408,10 @@ ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14422,10 +14422,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14524,10 +14524,10 @@ ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14590,10 +14590,10 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14604,10 +14604,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14656,10 +14656,10 @@ ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14708,10 +14708,10 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14722,10 +14722,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14774,10 +14774,10 @@ ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14811,10 +14811,10 @@ ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14825,10 +14825,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14839,10 +14839,10 @@ ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14853,10 +14853,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14867,10 +14867,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14881,10 +14881,10 @@ ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14916,10 +14916,10 @@ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14930,10 +14930,10 @@ ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14944,10 +14944,10 @@ ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14958,10 +14958,10 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14972,10 +14972,10 @@ ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14986,10 +14986,10 @@ ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6783,17 +6783,17 @@ ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -6814,17 +6814,17 @@ ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -6845,17 +6845,17 @@ ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -7102,17 +7102,17 @@ ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -7143,17 +7143,17 @@ ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -7184,17 +7184,17 @@ ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -13007,8 +13007,8 @@ ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13083,8 +13083,8 @@ ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13159,8 +13159,8 @@ ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-DS128-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13235,8 +13235,8 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13311,8 +13311,8 @@ ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13393,8 +13393,8 @@ ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13418,8 +13418,8 @@ ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13494,8 +13494,8 @@ ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -15117,10 +15117,10 @@ ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15218,10 +15218,10 @@ ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15319,10 +15319,10 @@ ; CI-DS128-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15420,10 +15420,10 @@ ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -15521,10 +15521,10 @@ ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -15535,10 +15535,10 @@ ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -15636,10 +15636,10 @@ ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -15662,10 +15662,10 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -15763,10 +15763,10 @@ ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15777,10 +15777,10 @@ ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -15843,10 +15843,10 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15895,10 +15895,10 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15947,10 +15947,10 @@ ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15999,10 +15999,10 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16051,10 +16051,10 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16065,10 +16065,10 @@ ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16117,10 +16117,10 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16143,10 +16143,10 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16195,10 +16195,10 @@ ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16209,10 +16209,10 @@ ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -16246,10 +16246,10 @@ ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16269,10 +16269,10 @@ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16295,10 +16295,10 @@ ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16321,10 +16321,10 @@ ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16347,10 +16347,10 @@ ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16361,10 +16361,10 @@ ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16387,10 +16387,10 @@ ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16413,10 +16413,10 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16439,10 +16439,10 @@ ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16453,10 +16453,10 @@ ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -16490,10 +16490,10 @@ ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16513,10 +16513,10 @@ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16534,10 +16534,10 @@ ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16555,10 +16555,10 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16576,10 +16576,10 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16590,10 +16590,10 @@ ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16611,10 +16611,10 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16632,10 +16632,10 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16653,10 +16653,10 @@ ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16667,10 +16667,10 @@ ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4676,17 +4676,17 @@ ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -4707,17 +4707,17 @@ ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -4877,17 +4877,17 @@ ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4918,17 +4918,17 @@ ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -11236,10 +11236,10 @@ ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11337,10 +11337,10 @@ ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11438,10 +11438,10 @@ ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11539,10 +11539,10 @@ ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11640,10 +11640,10 @@ ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11654,10 +11654,10 @@ ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -11720,10 +11720,10 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11772,10 +11772,10 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11824,10 +11824,10 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11876,10 +11876,10 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11928,10 +11928,10 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11942,10 +11942,10 @@ ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -11982,10 +11982,10 @@ ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12008,10 +12008,10 @@ ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12034,10 +12034,10 @@ ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12060,10 +12060,10 @@ ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12086,10 +12086,10 @@ ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12100,10 +12100,10 @@ ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -12140,10 +12140,10 @@ ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12166,10 +12166,10 @@ ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12192,10 +12192,10 @@ ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12218,10 +12218,10 @@ ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12244,10 +12244,10 @@ ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12258,10 +12258,10 @@ ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (store (s8)) ; CHECK-NEXT: S_ENDPGM 0 %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -27,24 +27,24 @@ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY8]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY9]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY10]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -95,8 +95,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C3]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -119,10 +119,10 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 @@ -154,18 +154,18 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C6]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -246,29 +246,29 @@ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C7]](s16) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY6]], [[C5]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[PRED_COPY7]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] @@ -312,33 +312,33 @@ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C8]](s16) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY6]], [[C6]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[PRED_COPY7]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY9]], [[C10]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] @@ -376,33 +376,33 @@ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C8]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C9]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C10]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[C11]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C12]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C7]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32) %0:_(s4) = G_CONSTANT i4 0 @@ -430,29 +430,29 @@ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]] ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -494,44 +494,44 @@ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[PRED_COPY6]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[C9]](s16) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY8]], [[C7]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[PRED_COPY9]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]] ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[PRED_COPY11]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -609,24 +609,24 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 @@ -667,315 +667,315 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[C5]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C6]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[C8]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY9]], [[C9]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[C10]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY11]], [[C11]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[C12]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C13]](s32) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY14]], [[C14]](s32) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32) + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C15]](s32) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C16]](s32) ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C17]](s32) ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C18]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C19]](s32) ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32) + ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY20]], [[C20]](s32) ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32) + ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY21]], [[C21]](s32) ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32) + ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY22]], [[C22]](s32) ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32) + ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY23]], [[C23]](s32) ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32) + ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY24]], [[C24]](s32) ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32) + ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY25]], [[C25]](s32) ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32) + ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY26]], [[C26]](s32) ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32) + ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY27]], [[C27]](s32) ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32) + ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY28]], [[C28]](s32) ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32) + ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY29]], [[C29]](s32) ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32) + ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY30]], [[C30]](s32) ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32) + ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY31]], [[C31]](s32) ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]] - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY33]], [[C1]](s32) + ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY32]], [[SHL31]] + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY34]], [[C2]](s32) ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY35]], [[C3]](s32) ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY36]], [[C4]](s32) ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY37]], [[C5]](s32) ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY38]], [[C6]](s32) ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY39]], [[C7]](s32) ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY40]], [[C8]](s32) ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY41]], [[C9]](s32) ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY42]], [[C10]](s32) ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY43]], [[C11]](s32) ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY44]], [[C12]](s32) ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY45]], [[C13]](s32) ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY46]], [[C14]](s32) ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY47]], [[C15]](s32) ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY48]], [[C16]](s32) ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY49]], [[C17]](s32) ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY50]], [[C18]](s32) ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY51]], [[C19]](s32) ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY52]], [[C20]](s32) ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]] - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY53]], [[C21]](s32) ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]] - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY54]], [[C22]](s32) ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]] - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY55]], [[C23]](s32) ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]] - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32) + ; CHECK-NEXT: [[PRED_COPY56:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY56]], [[C24]](s32) ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]] - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32) + ; CHECK-NEXT: [[PRED_COPY57:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY57]], [[C25]](s32) ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32) + ; CHECK-NEXT: [[PRED_COPY58:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY58]], [[C26]](s32) ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]] - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32) + ; CHECK-NEXT: [[PRED_COPY59:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY59]], [[C27]](s32) ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]] - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32) + ; CHECK-NEXT: [[PRED_COPY60:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY60]], [[C28]](s32) ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]] - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32) + ; CHECK-NEXT: [[PRED_COPY61:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY61]], [[C29]](s32) ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]] - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32) + ; CHECK-NEXT: [[PRED_COPY62:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY62]], [[C30]](s32) ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]] - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32) + ; CHECK-NEXT: [[PRED_COPY63:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY63]], [[C31]](s32) ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]] - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32) - ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]] - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY64:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY65:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY65]], [[C1]](s32) + ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY64]], [[SHL62]] + ; CHECK-NEXT: [[PRED_COPY66:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY66]], [[C2]](s32) ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]] - ; CHECK-NEXT: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY67:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY67]], [[C3]](s32) ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]] - ; CHECK-NEXT: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY68:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY68]], [[C4]](s32) ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]] - ; CHECK-NEXT: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY69:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY69]], [[C5]](s32) ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]] - ; CHECK-NEXT: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY70:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY70]], [[C6]](s32) ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]] - ; CHECK-NEXT: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY71:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY71]], [[C7]](s32) ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]] - ; CHECK-NEXT: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32) + ; CHECK-NEXT: [[PRED_COPY72:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY72]], [[C8]](s32) ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]] - ; CHECK-NEXT: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY73:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY73]], [[C9]](s32) ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]] - ; CHECK-NEXT: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY74:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY74]], [[C10]](s32) ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]] - ; CHECK-NEXT: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY75:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY75]], [[C11]](s32) ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]] - ; CHECK-NEXT: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32) + ; CHECK-NEXT: [[PRED_COPY76:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY76]], [[C12]](s32) ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]] - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32) + ; CHECK-NEXT: [[PRED_COPY77:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY77]], [[C13]](s32) ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]] - ; CHECK-NEXT: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32) + ; CHECK-NEXT: [[PRED_COPY78:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY78]], [[C14]](s32) ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]] - ; CHECK-NEXT: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32) + ; CHECK-NEXT: [[PRED_COPY79:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY79]], [[C15]](s32) ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]] - ; CHECK-NEXT: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32) + ; CHECK-NEXT: [[PRED_COPY80:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY80]], [[C16]](s32) ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]] - ; CHECK-NEXT: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32) + ; CHECK-NEXT: [[PRED_COPY81:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY81]], [[C17]](s32) ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]] - ; CHECK-NEXT: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32) + ; CHECK-NEXT: [[PRED_COPY82:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY82]], [[C18]](s32) ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]] - ; CHECK-NEXT: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32) + ; CHECK-NEXT: [[PRED_COPY83:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY83]], [[C19]](s32) ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]] - ; CHECK-NEXT: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY84:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY84]], [[C20]](s32) ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]] - ; CHECK-NEXT: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32) + ; CHECK-NEXT: [[PRED_COPY85:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY85]], [[C21]](s32) ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]] - ; CHECK-NEXT: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32) + ; CHECK-NEXT: [[PRED_COPY86:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY86]], [[C22]](s32) ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]] - ; CHECK-NEXT: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32) + ; CHECK-NEXT: [[PRED_COPY87:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY87]], [[C23]](s32) ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]] - ; CHECK-NEXT: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32) + ; CHECK-NEXT: [[PRED_COPY88:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY88]], [[C24]](s32) ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]] - ; CHECK-NEXT: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32) + ; CHECK-NEXT: [[PRED_COPY89:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY89]], [[C25]](s32) ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]] - ; CHECK-NEXT: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32) + ; CHECK-NEXT: [[PRED_COPY90:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY90]], [[C26]](s32) ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]] - ; CHECK-NEXT: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32) + ; CHECK-NEXT: [[PRED_COPY91:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY91]], [[C27]](s32) ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]] - ; CHECK-NEXT: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32) + ; CHECK-NEXT: [[PRED_COPY92:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY92]], [[C28]](s32) ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]] - ; CHECK-NEXT: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32) + ; CHECK-NEXT: [[PRED_COPY93:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY93]], [[C29]](s32) ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]] - ; CHECK-NEXT: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32) + ; CHECK-NEXT: [[PRED_COPY94:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY94]], [[C30]](s32) ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]] - ; CHECK-NEXT: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32) + ; CHECK-NEXT: [[PRED_COPY95:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY95]], [[C31]](s32) ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -459,8 +459,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -541,8 +541,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -561,10 +561,10 @@ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -572,10 +572,10 @@ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -134,8 +134,8 @@ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -21,33 +21,33 @@ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -61,26 +61,26 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[PRED_COPY]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -89,10 +89,10 @@ ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC2]] - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[TRUNC3]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16) ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] @@ -215,33 +215,33 @@ ; GFX-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; GFX-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir @@ -10,24 +10,24 @@ ; CHECK-LABEL: name: test_sadde_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY2]](s32), [[C]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[PRED_COPY]], [[PRED_COPY1]], [[ICMP]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SADDE]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ZEXT]](s32) + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 + %2:_(s32) = PRED_COPY $vgpr2 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s32), %6:_(s1) = G_SADDE %0, %1, %4 %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + $vgpr0 = PRED_COPY %5 + $vgpr1 = PRED_COPY %7 ... --- @@ -39,15 +39,15 @@ ; CHECK-LABEL: name: test_sadde_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr4_vgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY2]](<2 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x s32>) ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[SADDE2:%[0-9]+]]:_(s32), [[SADDE3:%[0-9]+]]:_(s1) = G_SADDE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDE]](s32), [[SADDE2]](s32) @@ -57,18 +57,18 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR1]](<2 x s32>) + %0:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = PRED_COPY $vgpr4_vgpr5 %3:_(s32) = G_CONSTANT i32 0 %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 %6:_(<2 x s32>), %7:_(<2 x s1>) = G_SADDE %0, %1, %5 %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + $vgpr0_vgpr1 = PRED_COPY %6 + $vgpr2_vgpr3 = PRED_COPY %8 ... --- @@ -80,21 +80,21 @@ ; CHECK-LABEL: name: test_sadde_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[PRED_COPY]], 16 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %13, 16 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[PRED_COPY2]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UADDE]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ZEXT]](s32) + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 + %2:_(s32) = PRED_COPY $vgpr2 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s16) = G_TRUNC %0 @@ -102,8 +102,8 @@ %7:_(s16), %8:_(s1) = G_SADDE %6, %7, %4 %9:_(s32) = G_ANYEXT %7 %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + $vgpr0 = PRED_COPY %9 + $vgpr1 = PRED_COPY %10 ... --- @@ -115,26 +115,26 @@ ; CHECK-LABEL: name: test_sadde_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY2]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[ICMP]] ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV1]], [[UV3]], [[UADDE1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[SADDE]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ZEXT]](s32) + %0:_(s64) = PRED_COPY $vgpr0_vgpr1 + %1:_(s64) = PRED_COPY $vgpr2_vgpr3 + %2:_(s32) = PRED_COPY $vgpr4 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s64), %6:_(s1) = G_SADDE %0, %1, %4 %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + $vgpr0_vgpr1 = PRED_COPY %5 + $vgpr2 = PRED_COPY %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -18,8 +18,8 @@ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] @@ -164,8 +164,8 @@ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] @@ -227,11 +227,11 @@ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] @@ -343,14 +343,14 @@ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[PRED_COPY2]] ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -190,10 +190,10 @@ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -403,16 +403,16 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -1236,8 +1236,8 @@ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) @@ -1253,8 +1253,8 @@ ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) @@ -1270,8 +1270,8 @@ ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -36,10 +36,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -62,10 +62,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -88,10 +88,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -138,8 +138,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -162,9 +162,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -187,12 +187,12 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -215,10 +215,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -313,18 +313,18 @@ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -56,9 +56,9 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -97,12 +97,12 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 @@ -378,9 +378,9 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -419,12 +419,12 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 @@ -978,18 +978,18 @@ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -1007,14 +1007,14 @@ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) @@ -1044,13 +1044,13 @@ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] @@ -1066,14 +1066,14 @@ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) @@ -1099,13 +1099,13 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -1120,14 +1120,14 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -207,9 +207,9 @@ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir @@ -10,24 +10,24 @@ ; CHECK-LABEL: name: test_ssube_s32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY2]](s32), [[C]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[PRED_COPY]], [[PRED_COPY1]], [[ICMP]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SSUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SSUBE]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ZEXT]](s32) + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 + %2:_(s32) = PRED_COPY $vgpr2 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s32), %6:_(s1) = G_SSUBE %0, %1, %4 %7:_(s32) = G_ZEXT %6 - $vgpr0 = COPY %5 - $vgpr1 = COPY %7 + $vgpr0 = PRED_COPY %5 + $vgpr1 = PRED_COPY %7 ... --- @@ -39,15 +39,15 @@ ; CHECK-LABEL: name: test_ssube_v2s32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr4_vgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY2]](<2 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x s32>) ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV2]], [[UV4]], [[ICMP]] ; CHECK-NEXT: [[SSUBE2:%[0-9]+]]:_(s32), [[SSUBE3:%[0-9]+]]:_(s1) = G_SSUBE [[UV3]], [[UV5]], [[ICMP1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBE]](s32), [[SSUBE2]](s32) @@ -57,18 +57,18 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = PRED_COPY [[BUILD_VECTOR1]](<2 x s32>) + %0:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = PRED_COPY $vgpr4_vgpr5 %3:_(s32) = G_CONSTANT i32 0 %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %3 %5:_(<2 x s1>) = G_ICMP intpred(eq), %2, %4 %6:_(<2 x s32>), %7:_(<2 x s1>) = G_SSUBE %0, %1, %5 %8:_(<2 x s32>) = G_ZEXT %7 - $vgpr0_vgpr1 = COPY %6 - $vgpr2_vgpr3 = COPY %8 + $vgpr0_vgpr1 = PRED_COPY %6 + $vgpr2_vgpr3 = PRED_COPY %8 ... --- name: test_ssube_s16 @@ -79,21 +79,21 @@ ; CHECK-LABEL: name: test_ssube_s16 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[PRED_COPY]], 16 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %13, 16 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[PRED_COPY2]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[USUBE]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ZEXT]](s32) + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 + %2:_(s32) = PRED_COPY $vgpr2 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s16) = G_TRUNC %0 @@ -101,8 +101,8 @@ %7:_(s16), %8:_(s1) = G_SSUBE %6, %7, %4 %9:_(s32) = G_ANYEXT %7 %10:_(s32) = G_ZEXT %8 - $vgpr0 = COPY %9 - $vgpr1 = COPY %10 + $vgpr0 = PRED_COPY %9 + $vgpr1 = PRED_COPY %10 ... --- @@ -114,26 +114,26 @@ ; CHECK-LABEL: name: test_ssube_s64 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PRED_COPY2]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV]], [[UV2]], [[ICMP]] ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV1]], [[UV3]], [[USUBE1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE]](s32), [[SSUBE]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ZEXT]](s32) + %0:_(s64) = PRED_COPY $vgpr0_vgpr1 + %1:_(s64) = PRED_COPY $vgpr2_vgpr3 + %2:_(s32) = PRED_COPY $vgpr4 %3:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(eq), %2, %3 %5:_(s64), %6:_(s1) = G_SSUBE %0, %1, %4 %7:_(s32) = G_ZEXT %6 - $vgpr0_vgpr1 = COPY %5 - $vgpr2 = COPY %7 + $vgpr0_vgpr1 = PRED_COPY %5 + $vgpr2 = PRED_COPY %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -18,8 +18,8 @@ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] @@ -164,8 +164,8 @@ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] @@ -227,11 +227,11 @@ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] @@ -343,14 +343,14 @@ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[PRED_COPY2]] ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -190,10 +190,10 @@ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -267,48 +267,48 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -327,48 +327,48 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -387,18 +387,18 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align1 @@ -406,29 +406,29 @@ ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) @@ -437,12 +437,12 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -529,22 +529,22 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -559,17 +559,17 @@ ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -600,12 +600,12 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -618,12 +618,12 @@ ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -683,22 +683,22 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -714,17 +714,17 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -756,12 +756,12 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -775,12 +775,12 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -844,29 +844,29 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) @@ -893,9 +893,9 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) @@ -903,7 +903,7 @@ ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -954,12 +954,12 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -986,12 +986,12 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -1050,44 +1050,44 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1102,23 +1102,23 @@ ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1128,13 +1128,13 @@ ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1165,24 +1165,24 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1195,24 +1195,24 @@ ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1342,44 +1342,44 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1395,23 +1395,23 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1421,13 +1421,13 @@ ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1459,24 +1459,24 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1490,24 +1490,24 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1637,44 +1637,44 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1690,23 +1690,23 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1716,13 +1716,13 @@ ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1754,24 +1754,24 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1785,24 +1785,24 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1932,39 +1932,39 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1980,17 +1980,17 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2001,13 +2001,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2039,19 +2039,19 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2065,19 +2065,19 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2208,40 +2208,40 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2258,17 +2258,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2280,13 +2280,13 @@ ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2319,20 +2319,20 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2347,20 +2347,20 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2505,25 +2505,25 @@ ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2769,56 +2769,56 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -2834,17 +2834,17 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2855,13 +2855,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2872,13 +2872,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -2910,26 +2910,26 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -2943,26 +2943,26 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -3107,73 +3107,73 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3189,17 +3189,17 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -3210,13 +3210,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -3227,13 +3227,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -3244,13 +3244,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -3282,33 +3282,33 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3322,33 +3322,33 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3478,82 +3478,82 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY8]], [[COPY9]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY6]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY8]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY6]](s64) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3569,23 +3569,23 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -3595,13 +3595,13 @@ ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -3612,18 +3612,18 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC6]](s32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s64) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s64) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) @@ -3633,13 +3633,13 @@ ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC9]](s32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s64) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR12]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) @@ -3671,42 +3671,42 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3720,42 +3720,42 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3886,73 +3886,73 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3970,17 +3970,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -3991,13 +3991,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4008,13 +4008,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4025,13 +4025,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -4065,33 +4065,33 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4107,33 +4107,33 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4277,73 +4277,73 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -4361,17 +4361,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -4382,13 +4382,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4399,13 +4399,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4416,13 +4416,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -4456,33 +4456,33 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4498,33 +4498,33 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4668,56 +4668,56 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -4735,17 +4735,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -4756,13 +4756,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4773,13 +4773,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4813,26 +4813,26 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -4848,26 +4848,26 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -5026,73 +5026,73 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5110,17 +5110,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -5131,13 +5131,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -5148,13 +5148,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -5165,13 +5165,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -5205,33 +5205,33 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -5247,33 +5247,33 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -5416,90 +5416,90 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -5520,17 +5520,17 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -5541,13 +5541,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -5558,13 +5558,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -5575,13 +5575,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -5592,13 +5592,13 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -5635,40 +5635,40 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5687,40 +5687,40 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5915,90 +5915,90 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -6021,17 +6021,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -6042,13 +6042,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -6059,13 +6059,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -6076,13 +6076,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -6093,13 +6093,13 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -6138,40 +6138,40 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6192,40 +6192,40 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6655,90 +6655,90 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -6761,17 +6761,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -6782,13 +6782,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -6799,13 +6799,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -6816,13 +6816,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -6833,13 +6833,13 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -6878,40 +6878,40 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6932,40 +6932,40 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -7174,139 +7174,139 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -7327,17 +7327,17 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -7348,13 +7348,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -7365,13 +7365,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -7382,13 +7382,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -7400,13 +7400,13 @@ ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -7416,13 +7416,13 @@ ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -7432,13 +7432,13 @@ ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -7448,13 +7448,13 @@ ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -7491,59 +7491,59 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7562,59 +7562,59 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7853,139 +7853,139 @@ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -8008,17 +8008,17 @@ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -8029,13 +8029,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -8046,13 +8046,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -8063,13 +8063,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -8081,13 +8081,13 @@ ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -8097,13 +8097,13 @@ ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -8113,13 +8113,13 @@ ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -8129,13 +8129,13 @@ ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -8174,59 +8174,59 @@ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -8247,59 +8247,59 @@ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -8604,155 +8604,155 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY10]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY13]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY14]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY16]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY15]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY18]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY19]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY18]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY20]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY22]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY21]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY23]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY24]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY25]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY24]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY26]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY23]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY28]](s32) + ; SI-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY23]], [[C3]] + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[PRED_COPY24]](s32) ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY27]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY23]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) - ; SI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR24]], [[C3]] - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY29]](s32) + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[PRED_COPY25]](s32) ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) @@ -8785,17 +8785,17 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -8806,13 +8806,13 @@ ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -8823,13 +8823,13 @@ ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -8840,13 +8840,13 @@ ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -8857,13 +8857,13 @@ ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -8873,13 +8873,13 @@ ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -8889,13 +8889,13 @@ ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -8905,13 +8905,13 @@ ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -8922,13 +8922,13 @@ ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY8]](s32) ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC16]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; VI-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR25]](s16) ; VI-NEXT: G_STORE [[ANYEXT16]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR24]](s32) @@ -8980,65 +8980,65 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 @@ -9069,65 +9069,65 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -325,12 +325,12 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -338,12 +338,12 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -362,22 +362,22 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -387,9 +387,9 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) @@ -397,7 +397,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -635,10 +635,10 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -784,17 +784,17 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -802,16 +802,16 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C5]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align1 @@ -840,14 +840,14 @@ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C1]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) @@ -875,10 +875,10 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -892,11 +892,11 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -924,11 +924,11 @@ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -954,10 +954,10 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -971,11 +971,11 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -1003,11 +1003,11 @@ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -1042,20 +1042,20 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C5]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1079,8 +1079,8 @@ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 @@ -1094,7 +1094,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR3]], [[C6]](s16) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -1136,11 +1136,11 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -1162,11 +1162,11 @@ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir @@ -87,8 +87,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %13, [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[AND]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND1]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[AND]], [[PRED_COPY]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[AND1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -41,9 +41,9 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -74,9 +74,9 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -144,10 +144,10 @@ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) @@ -208,15 +208,15 @@ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[AND8]](s32) ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] @@ -286,15 +286,15 @@ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[AND10]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND11]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[AND8]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[AND11]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -150,10 +150,10 @@ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -218,13 +218,13 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -290,16 +290,16 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -344,16 +344,16 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -398,36 +398,36 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY8]](s32) ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -177,9 +177,9 @@ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir @@ -87,8 +87,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %13, [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[AND]], [[COPY2]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND1]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[AND]], [[PRED_COPY]], [[ICMP]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[USUBE]](s32), [[AND1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[USUBE]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -41,9 +41,9 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -74,9 +74,9 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -144,10 +144,10 @@ ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) @@ -208,15 +208,15 @@ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[AND8]](s32) ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] @@ -286,15 +286,15 @@ ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[AND9]], [[AND10]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND11]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[AND5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[AND8]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[AND11]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -143,10 +143,10 @@ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -459,8 +459,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -540,8 +540,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -560,10 +560,10 @@ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -571,10 +571,10 @@ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -672,13 +672,13 @@ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[PRED_COPY]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) @@ -727,8 +727,8 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) @@ -740,11 +740,11 @@ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[MV1]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV]](s64), 0 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[PRED_COPY1]], [[PRED_COPY2]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] @@ -762,7 +762,7 @@ ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL5]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) @@ -770,10 +770,10 @@ ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL5]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[OR7]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY3]](s32), [[UV4]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL1]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL1]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL1]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND5]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -29,24 +29,24 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -73,24 +73,24 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) @@ -116,22 +116,22 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -156,22 +156,22 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) @@ -197,20 +197,20 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -235,20 +235,20 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) @@ -274,18 +274,18 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -310,18 +310,18 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false) @@ -347,20 +347,20 @@ ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -16,32 +16,32 @@ ; GFX6-LABEL: name: atomic_cmpswap_i32_1d ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -64,26 +64,26 @@ ; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -105,32 +105,32 @@ ; GFX6-LABEL: name: atomic_cmpswap_i64_1d ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -153,26 +153,26 @@ ; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -7,16 +7,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -28,16 +28,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -49,21 +49,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i64 %ret to <2 x float> @@ -75,17 +75,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -98,42 +98,42 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -143,7 +143,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -157,42 +157,42 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -212,16 +212,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -235,16 +235,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) %cast = bitcast i32 %ret to float diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -7,19 +7,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -32,17 +32,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -55,46 +55,46 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[PRED_COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -104,7 +104,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -118,45 +118,45 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[PRED_COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -176,19 +176,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -8,29 +8,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -41,29 +41,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -75,27 +75,27 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -107,27 +107,27 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -140,42 +140,42 @@ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -191,42 +191,42 @@ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -248,40 +248,40 @@ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -297,40 +297,40 @@ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -350,29 +350,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -385,29 +385,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -418,29 +418,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -451,27 +451,27 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -8,29 +8,29 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -41,40 +41,40 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY10]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -91,53 +91,53 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -150,40 +150,40 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -193,47 +193,47 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -243,7 +243,7 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -254,53 +254,53 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add_4095 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %voffset = add i32 %voffset.base, 4095 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -7,15 +7,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -26,18 +26,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -48,20 +48,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -72,22 +72,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -100,40 +100,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -143,7 +143,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -154,22 +154,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %voffset = add i32 %voffset.base, 4095 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -8,15 +8,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -28,16 +28,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -50,36 +50,36 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -89,7 +89,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -102,39 +102,39 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -144,7 +144,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -156,15 +156,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret float %val @@ -176,15 +176,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret float %val @@ -196,15 +196,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret float %val @@ -216,15 +216,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret float %val @@ -236,15 +236,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret float %val @@ -256,15 +256,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret float %val @@ -276,18 +276,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -298,20 +298,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -322,22 +322,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -348,15 +348,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -367,15 +367,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -392,18 +392,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -414,15 +414,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = zext i8 %val to i32 @@ -435,16 +435,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = sext i8 %val to i32 @@ -459,36 +459,36 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -498,7 +498,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -511,36 +511,36 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -550,7 +550,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = zext i8 %val to i32 @@ -563,14 +563,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[PRED_COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %val @@ -581,14 +581,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[PRED_COPY4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret float %val @@ -599,16 +599,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret float %val @@ -619,15 +619,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 16 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -639,15 +639,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -659,18 +659,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -682,15 +682,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret float %val @@ -701,15 +701,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret float %val @@ -720,17 +720,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 16 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -742,17 +742,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -764,17 +764,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -788,38 +788,38 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -829,7 +829,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 5000 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -843,39 +843,39 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -885,7 +885,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 5000 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -7,29 +7,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -40,27 +40,27 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -71,33 +71,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -108,38 +108,38 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -152,44 +152,44 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -205,39 +205,39 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -257,33 +257,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -294,33 +294,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -331,33 +331,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -369,33 +369,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -407,39 +407,39 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -454,47 +454,47 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY10]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -510,42 +510,42 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -7,15 +7,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -26,14 +26,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -44,17 +44,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -65,18 +65,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -87,19 +87,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -111,41 +111,41 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE2]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -165,17 +165,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -186,17 +186,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -207,17 +207,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -229,17 +229,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -251,20 +251,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -279,44 +279,44 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY8]], [[PRED_COPY10]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -8,15 +8,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -28,17 +28,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -51,37 +51,37 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -103,28 +103,28 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[PRED_COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -146,40 +146,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -199,15 +199,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -218,15 +218,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -237,15 +237,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -256,15 +256,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -275,15 +275,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -294,15 +294,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -313,15 +313,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -332,17 +332,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -353,18 +353,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -375,19 +375,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -398,15 +398,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -418,15 +418,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -438,15 +438,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -457,15 +457,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -476,17 +476,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -498,39 +498,39 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -550,14 +550,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -568,16 +568,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -588,15 +588,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -608,15 +608,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -628,18 +628,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -651,15 +651,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -670,15 +670,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -689,15 +689,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -709,15 +709,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -729,18 +729,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -754,40 +754,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -810,38 +810,38 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -7,29 +7,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -40,40 +40,40 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY10]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x half> %val @@ -90,53 +90,53 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -148,40 +148,40 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -191,47 +191,47 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -241,7 +241,7 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -252,29 +252,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret half %val @@ -285,29 +285,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret half %val @@ -318,29 +318,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret half %val @@ -351,29 +351,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret half %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -7,15 +7,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -26,18 +26,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x float> %val @@ -48,20 +48,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <3 x float> %val @@ -72,22 +72,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -99,40 +99,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -142,7 +142,7 @@ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -153,15 +153,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret float %val @@ -172,15 +172,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret float %val @@ -191,15 +191,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret float %val @@ -210,15 +210,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -7,29 +7,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -40,33 +40,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -83,38 +83,38 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -127,37 +127,37 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -173,37 +173,37 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -225,40 +225,40 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -274,40 +274,40 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -329,41 +329,41 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -379,41 +379,41 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -433,29 +433,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -466,29 +466,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -499,29 +499,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -532,29 +532,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -7,29 +7,29 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -42,37 +42,37 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -88,37 +88,37 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -140,40 +140,40 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -189,40 +189,40 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -244,41 +244,41 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -294,41 +294,41 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -8,15 +8,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -28,17 +28,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -50,18 +50,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -73,19 +73,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -97,16 +97,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -119,37 +119,37 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -171,40 +171,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -226,41 +226,41 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -281,15 +281,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -301,15 +301,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -321,15 +321,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -341,15 +341,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void @@ -362,14 +362,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) ret void @@ -380,14 +380,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) ret void @@ -398,16 +398,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) ret void @@ -418,15 +418,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -438,15 +438,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -458,18 +458,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -481,15 +481,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) ret void @@ -500,15 +500,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) ret void @@ -519,17 +519,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -541,17 +541,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -563,17 +563,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -587,39 +587,39 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -642,40 +642,40 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -11,46 +11,46 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val @@ -61,46 +61,46 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_glc ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_glc ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) ret i32 %val @@ -111,61 +111,61 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX7-LABEL: name: s_buffer_load_v2i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX8-LABEL: name: s_buffer_load_v2i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val @@ -176,76 +176,76 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val @@ -256,133 +256,133 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX6-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX6-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX6-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX6-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX6-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX7-LABEL: name: s_buffer_load_v8i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX7-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX7-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX7-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX7-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX7-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX8-LABEL: name: s_buffer_load_v8i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX8-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX8-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX8-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX8-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX8-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val @@ -393,229 +393,229 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX6-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX6-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX6-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX6-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX6-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX6-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX6-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX6-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX6-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX6-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX6-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX6-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX6-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX6-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX6-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX6-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX6-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX6-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX6-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX6-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX6-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX7-LABEL: name: s_buffer_load_v16i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX7-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX7-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX7-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX7-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX7-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX7-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX7-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX7-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX7-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX7-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX7-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX7-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX7-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX7-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX7-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX7-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX7-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX7-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX7-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX7-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX7-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX8-LABEL: name: s_buffer_load_v16i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX8-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX8-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX8-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX8-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX8-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX8-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX8-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX8-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX8-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX8-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX8-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX8-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX8-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX8-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX8-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX8-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX8-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX8-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX8-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX8-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX8-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val @@ -626,45 +626,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) ret i32 %val @@ -675,43 +675,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) ret i32 %val @@ -722,45 +722,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) ret i32 %val @@ -771,43 +771,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) ret i32 %val @@ -818,43 +818,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) ret i32 %val @@ -865,45 +865,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) ret i32 %val @@ -914,44 +914,44 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) ret i32 %val @@ -962,45 +962,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) ret i32 %val @@ -1011,46 +1011,46 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) ret i32 %load @@ -1061,45 +1061,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) ret i32 %load @@ -1110,45 +1110,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) ret i32 %load @@ -1159,45 +1159,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) ret i32 %load @@ -1208,45 +1208,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) ret i32 %load @@ -1257,45 +1257,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) ret i32 %load @@ -1306,45 +1306,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) ret i32 %load @@ -1355,45 +1355,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) ret i32 %load @@ -1404,45 +1404,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) ret i32 %load @@ -1453,44 +1453,44 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) ret i32 %load @@ -1501,45 +1501,45 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) ret i32 %load @@ -1551,43 +1551,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1598,52 +1598,52 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val @@ -1654,61 +1654,61 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val @@ -1719,64 +1719,64 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val @@ -1787,94 +1787,94 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1885,148 +1885,148 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2037,43 +2037,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2085,43 +2085,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2133,43 +2133,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2182,94 +2182,94 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2282,94 +2282,94 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2381,148 +2381,148 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2534,148 +2534,148 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2689,37 +2689,37 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2729,44 +2729,44 @@ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2776,44 +2776,44 @@ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2823,7 +2823,7 @@ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2836,35 +2836,35 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2874,42 +2874,42 @@ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2919,42 +2919,42 @@ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2964,7 +2964,7 @@ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2978,39 +2978,39 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3020,46 +3020,46 @@ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3069,46 +3069,46 @@ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3118,7 +3118,7 @@ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3132,28 +3132,28 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3170,35 +3170,35 @@ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3215,35 +3215,35 @@ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3260,7 +3260,7 @@ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val @@ -3273,37 +3273,37 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3313,44 +3313,44 @@ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3360,35 +3360,35 @@ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3405,7 +3405,7 @@ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val @@ -3419,36 +3419,36 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3459,58 +3459,58 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3521,58 +3521,58 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3583,22 +3583,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3613,40 +3613,40 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3657,62 +3657,62 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3723,62 +3723,62 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3789,22 +3789,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3817,40 +3817,40 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3861,62 +3861,62 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3927,62 +3927,62 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY9]], [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3993,22 +3993,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]] + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY17]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4021,37 +4021,37 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4062,59 +4062,59 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4125,59 +4125,59 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4188,22 +4188,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4216,37 +4216,37 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4257,59 +4257,59 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4320,59 +4320,59 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4383,22 +4383,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4411,37 +4411,37 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4452,59 +4452,59 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4515,59 +4515,59 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY8]], [[PRED_COPY6]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4578,22 +4578,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]] + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY16]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4606,28 +4606,28 @@ ; GFX6-NEXT: successors: %bb.2(0x80000000) ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: ; GFX6-NEXT: successors: %bb.3(0x80000000) ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4646,50 +4646,50 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: successors: %bb.2(0x80000000) ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: ; GFX7-NEXT: successors: %bb.3(0x80000000) ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4708,50 +4708,50 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: successors: %bb.2(0x80000000) ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: ; GFX8-NEXT: successors: %bb.3(0x80000000) ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY7]], [[PRED_COPY5]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4770,22 +4770,22 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -4796,43 +4796,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4844,43 +4844,43 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4892,52 +4892,52 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 @@ -4950,52 +4950,52 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 @@ -5009,49 +5009,49 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v @@ -5064,52 +5064,52 @@ ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll @@ -6,9 +6,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[SOFT_WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SOFT_WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.softwqm.f32(float %val) ret float %ret @@ -19,9 +19,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[SOFT_WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SOFT_WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.softwqm.v2f16(<2 x half> %val) @@ -34,14 +34,14 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vreg_64 = SOFT_WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.softwqm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -61,17 +61,17 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vreg_96 = SOFT_WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.softwqm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -7,18 +7,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -30,18 +30,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -53,23 +53,23 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i64 %ret to <2 x float> @@ -81,19 +81,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -106,45 +106,45 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -154,7 +154,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -168,45 +168,45 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -227,18 +227,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) %cast = bitcast i32 %ret to float diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -8,21 +8,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -35,19 +35,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -60,49 +60,49 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY11]], %subreg.sub0, [[PRED_COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -112,7 +112,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -126,47 +126,47 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY11]], %subreg.sub0, [[PRED_COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -187,21 +187,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -8,33 +8,33 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -45,33 +45,33 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -83,29 +83,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -117,29 +117,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -152,45 +152,45 @@ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -206,45 +206,45 @@ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -266,42 +266,42 @@ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -317,42 +317,42 @@ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -373,33 +373,33 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -410,29 +410,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -443,33 +443,33 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -480,29 +480,29 @@ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -7,33 +7,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -44,44 +44,44 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY11]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY11]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -98,57 +98,57 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY13]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY13]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY10]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY10]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY16]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY8]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -161,42 +161,42 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -207,70 +207,70 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY17]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY18]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY13]], [[PRED_COPY17]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY14]], [[PRED_COPY18]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY19]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY19]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY15]], [[COPY20]], implicit $exec - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY16]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY22]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY15]], [[PRED_COPY20]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY16]], [[PRED_COPY21]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY22]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -281,10 +281,10 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY13]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY14]] + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -295,33 +295,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -333,33 +333,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %fval = bitcast i16 %val to half diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -6,17 +6,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -27,20 +27,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -51,22 +51,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -77,24 +77,24 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -107,42 +107,42 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -153,14 +153,14 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY14]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY15]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -171,17 +171,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -193,17 +193,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %fval = bitcast i32 %val to float @@ -215,28 +215,28 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub4 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN:%[0-9]+]]:vreg_160 = BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN [[PRED_COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_TFE_IDXEN]].sub4 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1, [[PRED_COPY11]], %subreg.sub2, [[PRED_COPY12]], %subreg.sub3 ; CHECK-NEXT: FLAT_STORE_DWORDX4 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>) into %ir.value, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[PRED_COPY13]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -254,27 +254,27 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub3 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN [[PRED_COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_TFE_IDXEN]].sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1, [[PRED_COPY11]], %subreg.sub2 ; CHECK-NEXT: FLAT_STORE_DWORDX3 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>) into %ir.value, align 16, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[PRED_COPY12]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) @@ -292,24 +292,24 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub1 - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.value, addrspace 1) - ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_X_TFE_IDXEN [[PRED_COPY8]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_TFE_IDXEN]].sub1 + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.value, addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE2]], [[PRED_COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.status, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -7,17 +7,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,20 +29,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -54,22 +54,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -81,24 +81,24 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -110,18 +110,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -133,17 +133,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -155,17 +155,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) ret float %val @@ -178,42 +178,42 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -224,7 +224,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -235,17 +235,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = zext i8 %val to i32 @@ -258,18 +258,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = sext i8 %val to i32 @@ -282,17 +282,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = zext i16 %val to i32 @@ -305,18 +305,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = sext i16 %val to i32 @@ -330,17 +330,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -352,17 +352,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -380,20 +380,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -405,17 +405,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -7,33 +7,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -44,37 +44,37 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -91,42 +91,42 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY10]], [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -138,45 +138,45 @@ ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -192,45 +192,45 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -250,33 +250,33 @@ ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -6,17 +6,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -27,19 +27,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,20 +50,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -74,21 +74,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -100,45 +100,45 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -158,17 +158,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -7,17 +7,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -28,19 +28,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -51,20 +51,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -75,21 +75,21 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -101,49 +101,49 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_128 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY12]], %subreg.sub0, [[PRED_COPY13]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[PRED_COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -163,17 +163,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -185,17 +185,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -207,17 +207,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret void @@ -228,17 +228,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -255,19 +255,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -9,33 +9,33 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -46,44 +46,44 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY11]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY11]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x half> %val @@ -100,57 +100,57 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY8]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY13]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY13]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY10]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY10]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY16]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -161,35 +161,35 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -201,42 +201,42 @@ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -247,52 +247,52 @@ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY13]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY14]] + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -303,28 +303,28 @@ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY13]], [[COPY17]], implicit $exec - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY14]], [[COPY18]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY13]], [[PRED_COPY17]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY14]], [[PRED_COPY18]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY19]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY19]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY15]], [[COPY20]], implicit $exec - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY16]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY22]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY15]], [[PRED_COPY20]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY16]], [[PRED_COPY21]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY22]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -335,33 +335,33 @@ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -8,17 +8,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -29,20 +29,20 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x float> %val @@ -53,22 +53,22 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <3 x float> %val @@ -79,24 +79,24 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -107,18 +107,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -130,42 +130,42 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY1]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY3]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec @@ -176,14 +176,14 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY13]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY14]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY15]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -194,17 +194,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll @@ -6,9 +6,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.wqm.f32(float %val) ret float %ret @@ -19,9 +19,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.wqm.v2f16(<2 x half> %val) @@ -34,14 +34,14 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[WQM:%[0-9]+]]:vreg_64 = WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.wqm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -61,17 +61,17 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[WQM:%[0-9]+]]:vreg_96 = WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.wqm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll @@ -8,9 +8,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.wwm.f32(float %val) ret float %ret @@ -21,9 +21,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half> %val) @@ -36,14 +36,14 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.wwm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -63,17 +63,17 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float> %val) ret <3 x float> %ret @@ -84,9 +84,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.strict.wwm.f32(float %val) ret float %ret @@ -97,9 +97,9 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half> %val) @@ -112,14 +112,14 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.strict.wwm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -139,17 +139,17 @@ ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -42,8 +42,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[PRED_COPY]], [[TRUNC1]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -67,8 +67,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir @@ -31,8 +31,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_ADD %0, %1 @@ -50,8 +50,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -42,8 +42,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -63,8 +63,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY2]](s32), [[COPY3]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr.v2f16), 0, 0, %0, %1, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir @@ -29,11 +29,11 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY3]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -58,7 +58,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -85,7 +85,7 @@ ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir @@ -14,8 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[PRED_COPY]](s1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 @@ -35,8 +35,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[PRED_COPY]](s1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) %0:_(s32) = COPY $vgpr0 %1:_(s1) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[PRED_COPY]](s64), [[PRED_COPY1]](s32) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 @@ -35,8 +35,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[PRED_COPY]](s64), [[COPY1]](s32) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 @@ -54,8 +54,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[PRED_COPY]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $sgpr0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -33,8 +33,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir @@ -19,11 +19,11 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -48,12 +48,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[ICMP]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -78,11 +78,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY5]](s32), [[COPY6]](s32), [[ICMP]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[ICMP]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -107,8 +107,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[ICMP]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[COPY1]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[PRED_COPY]](s32), 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir @@ -15,9 +15,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir @@ -15,8 +15,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 @@ -35,9 +35,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir @@ -14,8 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 @@ -33,9 +33,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir @@ -14,8 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 @@ -33,9 +33,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir @@ -15,9 +15,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir @@ -14,8 +14,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[PRED_COPY]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[PRED_COPY]](s32), [[COPY1]](s32), 1 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[PRED_COPY]](s32), 1 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 @@ -33,8 +33,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 32 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[PRED_COPY]](s32), [[COPY1]](s32), 32 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[PRED_COPY]](s32), 32 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -8,39 +8,39 @@ ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -53,41 +53,41 @@ ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -101,16 +101,16 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -144,7 +144,7 @@ ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -154,24 +154,24 @@ ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -205,7 +205,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -215,8 +215,8 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef @@ -230,18 +230,18 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} @@ -274,7 +274,7 @@ ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -284,26 +284,26 @@ ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} @@ -336,7 +336,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -346,8 +346,8 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -8,46 +8,46 @@ ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -61,48 +61,48 @@ ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr14 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY12]](s32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr14 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY12]](s32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -117,21 +117,21 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -165,7 +165,7 @@ ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -182,21 +182,21 @@ ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -230,7 +230,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -254,21 +254,21 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -294,7 +294,7 @@ ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -311,21 +311,21 @@ ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -351,7 +351,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -375,21 +375,21 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr9 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr10 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr11 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -435,7 +435,7 @@ ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -452,21 +452,21 @@ ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr9 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr10 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr11 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -512,7 +512,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir @@ -15,8 +15,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[COPY2]](s32), 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[PRED_COPY]](s32), 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), %0, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir @@ -16,8 +16,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[PRED_COPY]](s32), 1, 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 @@ -36,9 +36,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[PRED_COPY]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir @@ -15,8 +15,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[PRED_COPY]](s32), 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[PRED_COPY]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir @@ -17,9 +17,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, 1, [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -40,10 +40,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir @@ -16,9 +16,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -39,10 +39,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir @@ -12,14 +12,14 @@ ; CHECK-LABEL: name: kill_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2 ... @@ -34,12 +34,12 @@ ; CHECK-LABEL: name: kill_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %2 ... @@ -53,8 +53,8 @@ ; CHECK-LABEL: name: kill_constant_true ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 true G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 ... @@ -68,8 +68,8 @@ ; CHECK-LABEL: name: kill_constant_false ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 false G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir @@ -39,10 +39,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -88,10 +88,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -137,10 +137,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -186,10 +186,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -235,10 +235,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -284,10 +284,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -333,10 +333,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -382,10 +382,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -431,10 +431,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -480,10 +480,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -529,10 +529,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -578,10 +578,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -627,10 +627,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -676,10 +676,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -725,10 +725,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -774,10 +774,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -823,10 +823,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -872,10 +872,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -921,10 +921,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -970,10 +970,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -8,17 +8,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -30,18 +30,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY7]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -54,15 +54,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -87,7 +87,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -97,7 +97,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -110,15 +110,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -126,15 +126,15 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY5]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -144,7 +144,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -157,15 +157,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -184,8 +184,8 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY5]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -193,7 +193,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -203,7 +203,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir @@ -14,8 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -70,9 +70,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -90,10 +90,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -113,8 +113,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -132,10 +132,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -153,8 +153,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY]](s32), implicit $exec ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $agpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -8,31 +8,31 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GREEDY-LABEL: name: s_buffer_load_i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val @@ -43,39 +43,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val @@ -86,45 +86,45 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val @@ -135,75 +135,75 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val @@ -214,123 +214,123 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; CHECK-NEXT: $sgpr8 = COPY [[INT8]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; CHECK-NEXT: $sgpr9 = COPY [[INT9]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; CHECK-NEXT: $sgpr10 = COPY [[INT10]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; CHECK-NEXT: $sgpr11 = COPY [[INT11]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; CHECK-NEXT: $sgpr12 = COPY [[INT12]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[INT13]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[INT14]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr8 = PRED_COPY [[INT8]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr9 = PRED_COPY [[INT9]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr10 = PRED_COPY [[INT10]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr11 = PRED_COPY [[INT11]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY17]](s32) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[INT12]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY18]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[INT13]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY19]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[INT14]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY20]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[INT15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GREEDY-NEXT: $sgpr8 = COPY [[INT8]](s32) - ; GREEDY-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GREEDY-NEXT: $sgpr9 = COPY [[INT9]](s32) - ; GREEDY-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GREEDY-NEXT: $sgpr10 = COPY [[INT10]](s32) - ; GREEDY-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GREEDY-NEXT: $sgpr11 = COPY [[INT11]](s32) - ; GREEDY-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GREEDY-NEXT: $sgpr12 = COPY [[INT12]](s32) - ; GREEDY-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GREEDY-NEXT: $sgpr13 = COPY [[INT13]](s32) - ; GREEDY-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GREEDY-NEXT: $sgpr14 = COPY [[INT14]](s32) - ; GREEDY-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GREEDY-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) + ; GREEDY-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY13]](s32) + ; GREEDY-NEXT: $sgpr8 = PRED_COPY [[INT8]](s32) + ; GREEDY-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY14]](s32) + ; GREEDY-NEXT: $sgpr9 = PRED_COPY [[INT9]](s32) + ; GREEDY-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY15]](s32) + ; GREEDY-NEXT: $sgpr10 = PRED_COPY [[INT10]](s32) + ; GREEDY-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY16]](s32) + ; GREEDY-NEXT: $sgpr11 = PRED_COPY [[INT11]](s32) + ; GREEDY-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY17]](s32) + ; GREEDY-NEXT: $sgpr12 = PRED_COPY [[INT12]](s32) + ; GREEDY-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY18]](s32) + ; GREEDY-NEXT: $sgpr13 = PRED_COPY [[INT13]](s32) + ; GREEDY-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY19]](s32) + ; GREEDY-NEXT: $sgpr14 = PRED_COPY [[INT14]](s32) + ; GREEDY-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV15]](s32) + ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY20]](s32) + ; GREEDY-NEXT: $sgpr15 = PRED_COPY [[INT15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val @@ -342,31 +342,31 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -377,35 +377,35 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val @@ -416,37 +416,37 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val @@ -457,39 +457,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val @@ -500,51 +500,51 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -555,71 +555,71 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -630,16 +630,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -647,16 +647,16 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 @@ -671,17 +671,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -693,17 +693,17 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -722,19 +722,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -752,19 +752,19 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -789,17 +789,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -811,17 +811,17 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -840,19 +840,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -870,19 +870,19 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -907,17 +907,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -929,17 +929,17 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -958,19 +958,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -988,19 +988,19 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -1025,17 +1025,17 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -1047,17 +1047,17 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1) @@ -1076,19 +1076,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -1106,19 +1106,19 @@ ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1) @@ -1142,37 +1142,37 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1184,37 +1184,37 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1226,35 +1226,35 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1267,57 +1267,57 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1330,55 +1330,55 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1390,77 +1390,77 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1472,75 +1472,75 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1553,13 +1553,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1584,7 +1584,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1592,19 +1592,19 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1629,7 +1629,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1637,7 +1637,7 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1649,14 +1649,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1681,7 +1681,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1689,20 +1689,20 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1727,7 +1727,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1735,7 +1735,7 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1748,15 +1748,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1781,7 +1781,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1789,21 +1789,21 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1828,7 +1828,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1836,7 +1836,7 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1849,11 +1849,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -1888,17 +1888,17 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -1933,7 +1933,7 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val @@ -1945,13 +1945,13 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1976,7 +1976,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1984,19 +1984,19 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2021,7 +2021,7 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2029,7 +2029,7 @@ ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val @@ -2042,14 +2042,14 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2074,8 +2074,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2085,27 +2085,27 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2130,8 +2130,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2141,14 +2141,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2162,15 +2162,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2195,8 +2195,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2206,28 +2206,28 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2252,8 +2252,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2263,14 +2263,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2282,15 +2282,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2315,8 +2315,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2326,28 +2326,28 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2372,8 +2372,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2383,14 +2383,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2402,15 +2402,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2434,8 +2434,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2445,28 +2445,28 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2490,8 +2490,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2501,14 +2501,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2520,15 +2520,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2552,8 +2552,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2563,28 +2563,28 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2608,8 +2608,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2619,14 +2619,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2638,15 +2638,15 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2670,8 +2670,8 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2681,28 +2681,28 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2726,8 +2726,8 @@ ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2737,14 +2737,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2756,11 +2756,11 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -2798,24 +2798,24 @@ ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -2853,14 +2853,14 @@ ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -2871,35 +2871,35 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2911,35 +2911,35 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2951,43 +2951,43 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 @@ -3000,43 +3000,43 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 @@ -3050,39 +3050,39 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY5]], [[C]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY5]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v @@ -3095,41 +3095,41 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -21,12 +21,12 @@ ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY1]], [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[PRED_COPY1]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 @@ -34,11 +34,11 @@ ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[PRED_COPY]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 @@ -63,8 +63,8 @@ ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) @@ -75,8 +75,8 @@ ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -8,16 +8,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,18 +29,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY7]](s32), [[PRED_COPY8]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -53,14 +53,14 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -85,7 +85,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -95,7 +95,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -108,14 +108,14 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -123,15 +123,15 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY6]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -141,7 +141,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -154,14 +154,14 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -180,8 +180,8 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY6]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -189,7 +189,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -199,7 +199,7 @@ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -8,16 +8,16 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -29,19 +29,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY6]](s32) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY9]](s32), [[PRED_COPY10]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -54,15 +54,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -87,7 +87,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,15 +109,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -125,15 +125,15 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY7]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -155,15 +155,15 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -182,8 +182,8 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY7]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -191,7 +191,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[PRED_COPY]](p3), [[PRED_COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -36,8 +36,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[PRED_COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -57,8 +57,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[PRED_COPY]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir @@ -12,14 +12,14 @@ ; CHECK-LABEL: name: wqm_demote_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2 ... @@ -34,12 +34,12 @@ ; CHECK-LABEL: name: wqm_demote_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[ICMP]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %2 ... @@ -53,8 +53,8 @@ ; CHECK-LABEL: name: wqm_demote_constant_true ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 true G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 ... @@ -68,8 +68,8 @@ ; CHECK-LABEL: name: wqm_demote_constant_false ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 false G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir @@ -16,8 +16,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(ne), %0, %1 @@ -56,8 +56,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir @@ -15,8 +15,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -24,8 +24,8 @@ ; CHECK-LABEL: name: amdgpu_wave_address_v ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[COPY]](p1) :: (store (p5), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[PRED_COPY]](p1) :: (store (p5), addrspace 1) %0:_(p1) = G_IMPLICIT_DEF %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %1, %0 :: (store (p5), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir @@ -94,10 +94,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -173,10 +173,10 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[PRED_COPY1]], [[ICMP1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 0 @@ -198,10 +198,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -32,8 +32,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_AND %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_AND %0, %1 @@ -256,8 +256,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -285,8 +285,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -315,10 +315,10 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -349,10 +349,10 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -608,8 +608,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[PRED_COPY]], [[COPY1]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_AND %0, %1 @@ -627,8 +627,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir @@ -29,9 +29,9 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[DEF]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_ANYEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -231,8 +231,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -252,8 +252,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir @@ -15,10 +15,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p1), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 @@ -38,10 +38,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p0), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 @@ -61,10 +61,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p3), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir @@ -92,8 +92,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: @@ -125,8 +125,8 @@ ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -158,8 +158,8 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -191,8 +191,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_BSWAP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir @@ -32,8 +32,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 @@ -112,8 +112,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $agpr0 @@ -135,8 +135,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $vgpr0 @@ -158,9 +158,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 @@ -182,9 +182,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir @@ -32,8 +32,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[COPY1]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -108,8 +108,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $agpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -129,8 +129,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $agpr0 @@ -150,9 +150,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $agpr0 %1:_(<2 x s16>) = COPY $sgpr0 @@ -172,9 +172,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $agpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir @@ -13,8 +13,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s32)) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 G_STORE %1, %0 :: (store (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir @@ -22,8 +22,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align1 ; WAVE32: liveins: $sgpr0 @@ -31,8 +31,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 1 @@ -56,8 +56,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align2 ; WAVE32: liveins: $sgpr0 @@ -65,8 +65,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 2 @@ -90,8 +90,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align4 ; WAVE32: liveins: $sgpr0 @@ -99,8 +99,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 4 @@ -124,8 +124,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align8 ; WAVE32: liveins: $sgpr0 @@ -133,8 +133,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 8 @@ -158,8 +158,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align16 ; WAVE32: liveins: $sgpr0 @@ -167,8 +167,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 16 @@ -192,8 +192,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -203,8 +203,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -230,8 +230,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -241,8 +241,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -268,8 +268,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -279,8 +279,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -303,15 +303,15 @@ ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 4 @@ -335,8 +335,8 @@ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 ; WAVE32: liveins: $sgpr0 @@ -344,8 +344,8 @@ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 8 @@ -369,8 +369,8 @@ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 ; WAVE32: liveins: $sgpr0 @@ -378,8 +378,8 @@ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 16 @@ -403,8 +403,8 @@ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -414,8 +414,8 @@ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -94,8 +94,8 @@ ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -147,8 +147,8 @@ ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -244,8 +244,8 @@ ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -297,8 +297,8 @@ ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -418,9 +418,9 @@ ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 @@ -456,9 +456,9 @@ ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 @@ -509,9 +509,9 @@ ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 @@ -547,9 +547,9 @@ ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -572,8 +572,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -620,16 +620,16 @@ ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -676,8 +676,8 @@ ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 1 @@ -701,8 +701,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -749,16 +749,16 @@ ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_addm1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -805,8 +805,8 @@ ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 -1 @@ -830,8 +830,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -878,16 +878,16 @@ ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add16 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -934,8 +934,8 @@ ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 16 @@ -959,8 +959,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -990,9 +990,9 @@ ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 @@ -1000,8 +1000,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1031,9 +1031,9 @@ ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -1058,8 +1058,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1106,16 +1106,16 @@ ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv_idx_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1162,8 +1162,8 @@ ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 1 @@ -1187,8 +1187,8 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1218,9 +1218,9 @@ ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 @@ -1228,8 +1228,8 @@ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1259,9 +1259,9 @@ ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FADD %0, %1 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FADD %0, %1 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FCANONICALIZE %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FCEIL %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[PRED_COPY]](s32), [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_FCMP floatpred(uge), %0(s32), %1 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[PRED_COPY]](s32), [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_FCMP floatpred(uge), %0, %1 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_FCMP floatpred(uge), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FEXP2 %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FLOG2 %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir @@ -15,10 +15,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -37,9 +37,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -58,9 +58,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -79,9 +79,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[PRED_COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -100,8 +100,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 @@ -120,8 +120,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[PRED_COPY]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -140,8 +140,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FMUL %0, %1 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FMUL %0, %1 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FMUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s64) = G_FPEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FPTOSI %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FPTOUI %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[PRED_COPY]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = G_FPTRUNC %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FRINT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir @@ -15,10 +15,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -37,9 +37,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[PRED_COPY]], [[PRED_COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -58,9 +58,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[COPY1]], [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -79,9 +79,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[PRED_COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -100,8 +100,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 @@ -120,8 +120,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[PRED_COPY]], [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -140,8 +140,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[FSQRT]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FSQRT %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FSUB %0, %1 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FSUB %0, %1 @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FSUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir @@ -42,15 +42,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[COPY1]] ; GFX8-LABEL: name: icmp_eq_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -68,15 +68,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_eq_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_ICMP intpred(eq), %1, %0 @@ -118,9 +118,9 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_eq_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -145,15 +145,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_eq_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -171,15 +171,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_eq_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(eq), %1, %0 @@ -221,9 +221,9 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_ne_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -248,15 +248,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_ne_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(ne), %0, %1 @@ -274,15 +274,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_ne_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(ne), %1, %0 @@ -324,17 +324,17 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_slt_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[PRED_COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $sgpr2_sgpr3 %2:_(s1) = G_ICMP intpred(slt), %0, %1 @@ -352,15 +352,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_slt_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(slt), %0, %1 @@ -378,15 +378,15 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_slt_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(slt), %1, %0 @@ -430,18 +430,18 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: map_icmp_already_vcc_bank_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -464,18 +464,18 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: map_icmp_already_vcc_regclass_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir @@ -16,9 +16,9 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s16) = G_TRUNC %0 @@ -40,8 +40,8 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[TRUNC1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 @@ -63,8 +63,8 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[TRUNC1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-implicit-def.mir @@ -14,8 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (s32), addrspace 1) @@ -47,8 +47,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[DEF]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[DEF]](s64) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (s64), addrspace 1) @@ -67,8 +67,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<3 x s32>) = COPY [[DEF]](<3 x s32>) - ; CHECK-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<3 x s32>) = PRED_COPY [[DEF]](<3 x s32>) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<3 x s32>), addrspace 1, align 4) @@ -87,8 +87,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[DEF]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<4 x s32>), addrspace 1, align 4) @@ -158,8 +158,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY]], [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -182,8 +182,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[COPY]], [[COPY1]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY]], [[COPY1]] ; FAST-NEXT: S_ENDPGM 0, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: test_implicit_def_s1_explicit_vcc_use_0 ; GREEDY: liveins: $vgpr0, $vgpr1 @@ -209,8 +209,8 @@ ; FAST-LABEL: name: test_implicit_def_s1_explicit_vcc_use_1 ; FAST: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[DEF]](s32) - ; FAST-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: S_ENDPGM 0, implicit [[COPY]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s1) ; GREEDY-LABEL: name: test_implicit_def_s1_explicit_vcc_use_1 ; GREEDY: [[DEF:%[0-9]+]]:vcc(s1) = G_IMPLICIT_DEF ; GREEDY-NEXT: S_ENDPGM 0, implicit [[DEF]](s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -52,24 +52,24 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY3]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY4]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -93,20 +93,20 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C1]] ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C2]] ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C3]] ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) @@ -132,24 +132,24 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY3]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY4]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -174,8 +174,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] @@ -215,20 +215,20 @@ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY4]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY5]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY2]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY6]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY3]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -253,19 +253,19 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C1]] ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C2]] ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C3]] ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) @@ -383,9 +383,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] @@ -415,57 +415,57 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY5]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY2]], [[UV3]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY7]], [[UV5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY3]], [[UV4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY4]], [[UV5]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY9]], [[UV7]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY5]], [[UV6]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY6]], [[UV7]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY11]], [[UV9]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY7]], [[UV8]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY8]], [[UV9]] ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV10]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY13]], [[UV11]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY9]], [[UV10]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY10]], [[UV11]] ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV12]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY15]], [[UV13]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY11]], [[UV12]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY12]], [[UV13]] ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV14]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY17]], [[UV15]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY13]], [[UV14]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY14]], [[UV15]] ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV16]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY19]], [[UV17]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY15]], [[UV16]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY16]], [[UV17]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) @@ -491,9 +491,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] @@ -586,52 +586,52 @@ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV3]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV5]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY3]], [[UV5]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY7]], [[UV6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV7]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY4]], [[UV6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY5]], [[UV7]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY9]], [[UV8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV9]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY6]], [[UV8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY7]], [[UV9]] ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY11]], [[UV10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV11]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY8]], [[UV10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY9]], [[UV11]] ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY13]], [[UV12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV13]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY10]], [[UV12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY11]], [[UV13]] ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY15]], [[UV14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV15]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY12]], [[UV14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY13]], [[UV15]] ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY17]], [[UV16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV17]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY14]], [[UV16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY15]], [[UV17]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) @@ -787,16 +787,16 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir @@ -32,8 +32,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[COPY1]](s32), 0 %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -50,8 +50,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[PRED_COPY]](s32), 0 %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -122,8 +122,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[COPY1]](s32), 0 %0:_(s64) = COPY $agpr0_agpr1 %1:_(s32) = COPY $vgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -140,8 +140,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[PRED_COPY]](s32), 0 %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $agpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -159,9 +159,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[PRED_COPY1]](s32), 0 %0:_(s64) = COPY $agpr0_agpr1 %1:_(s32) = COPY $sgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -178,9 +178,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[PRED_COPY1]](s32), 0 %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $agpr2 %2:_(s64) = G_INSERT %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC_TRUNC %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -117,7 +117,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -139,7 +139,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -160,7 +160,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -187,7 +187,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -278,7 +278,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -299,7 +299,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -321,7 +321,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -342,7 +342,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -363,7 +363,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -390,7 +390,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -497,8 +497,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) @@ -514,8 +514,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32), addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) @@ -532,8 +532,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -550,8 +550,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -568,8 +568,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -586,8 +586,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -620,8 +620,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s32), align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... @@ -638,8 +638,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s32), align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... @@ -656,8 +656,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p5) = PRED_COPY [[COPY]](p5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p5) :: (load (s32), addrspace 5) %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -233,8 +233,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -254,8 +254,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -21,9 +21,9 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] @@ -86,16 +86,16 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UMULH]](s32) ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY2]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY3]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vcc(s1) = PRED_COPY [[UADDE1]](s1) ; GFX9MI-LABEL: name: mad_u64_u32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} @@ -104,9 +104,9 @@ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[PRED_COPY1]], [[MV]] ; GFX10-LABEL: name: mad_u64_u32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -117,13 +117,13 @@ ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UMULH]](s32) ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY1]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[UADDE1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -149,9 +149,9 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -177,8 +177,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[COPY1]], [[MV]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -204,9 +204,9 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -232,8 +232,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[PRED_COPY]], [[MV]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -259,8 +259,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr1 @@ -313,9 +313,9 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] @@ -393,21 +393,21 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SMULH]](s32) ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY2]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY3]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vcc(s1) = PRED_COPY [[XOR1]](s1) ; GFX9MI-LABEL: name: mad_i64_i32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} @@ -416,9 +416,9 @@ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[PRED_COPY]](s32), [[PRED_COPY1]], [[MV]] ; GFX10-LABEL: name: mad_i64_i32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -432,16 +432,16 @@ ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SMULH]](s32) ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY1]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[XOR1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -466,9 +466,9 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) @@ -516,8 +516,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s64) = G_CONSTANT i64 0 @@ -540,9 +540,9 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]] @@ -593,8 +593,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s64) = G_CONSTANT i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir @@ -78,9 +78,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 @@ -100,9 +100,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir @@ -31,8 +31,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_MUL %0, %1 @@ -50,8 +50,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_MUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir @@ -32,8 +32,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_OR %0, %1 @@ -51,8 +51,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_OR %0, %1 @@ -120,10 +120,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[ICMP]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $vgpr0 @@ -150,10 +150,10 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[PRED_COPY1]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -230,10 +230,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[PRED_COPY1]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -423,8 +423,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -452,8 +452,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -482,10 +482,10 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -516,10 +516,10 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -775,8 +775,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[PRED_COPY]], [[COPY1]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_OR %0, %1 @@ -794,8 +794,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir @@ -258,22 +258,22 @@ ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: @@ -289,22 +289,22 @@ ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -347,8 +347,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -360,14 +360,14 @@ ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: @@ -378,8 +378,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -391,14 +391,14 @@ ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -441,8 +441,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -452,14 +452,14 @@ ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: @@ -470,8 +470,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -481,14 +481,14 @@ ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -738,10 +738,10 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: @@ -771,10 +771,10 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -836,9 +836,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: @@ -868,9 +868,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -913,8 +913,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -925,13 +925,13 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: @@ -942,8 +942,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -954,13 +954,13 @@ ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1007,22 +1007,22 @@ ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: @@ -1037,22 +1037,22 @@ ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1095,8 +1095,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1116,9 +1116,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: @@ -1129,8 +1129,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1150,9 +1150,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1206,8 +1206,8 @@ ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1216,9 +1216,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: @@ -1240,8 +1240,8 @@ ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1250,9 +1250,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1313,9 +1313,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: @@ -1344,9 +1344,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1407,10 +1407,10 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: @@ -1439,10 +1439,10 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1503,9 +1503,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: @@ -1534,9 +1534,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1676,7 +1676,7 @@ ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} @@ -1685,16 +1685,16 @@ ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 456 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_result_scc_scc_sbranch ; GREEDY: bb.0: @@ -1710,7 +1710,7 @@ ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} @@ -1719,11 +1719,11 @@ ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 123 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 456 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[C1]], [[C2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir @@ -340,19 +340,19 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $sgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_ss_vcc_sbranch @@ -364,19 +364,19 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -419,19 +419,19 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_sv_vcc_sbranch @@ -443,19 +443,19 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -498,19 +498,19 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_vs_vcc_sbranch @@ -522,19 +522,19 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -577,19 +577,19 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_vv_vcc_sbranch @@ -601,19 +601,19 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -897,22 +897,22 @@ ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: @@ -928,22 +928,22 @@ ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -986,8 +986,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -999,14 +999,14 @@ ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: @@ -1017,8 +1017,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1030,14 +1030,14 @@ ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1080,8 +1080,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1091,14 +1091,14 @@ ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: @@ -1109,8 +1109,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1120,14 +1120,14 @@ ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1377,10 +1377,10 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: @@ -1410,10 +1410,10 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1475,9 +1475,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: @@ -1507,9 +1507,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1552,8 +1552,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1564,13 +1564,13 @@ ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: @@ -1581,8 +1581,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1593,13 +1593,13 @@ ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1646,22 +1646,22 @@ ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: @@ -1676,22 +1676,22 @@ ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1734,8 +1734,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1755,9 +1755,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: @@ -1768,8 +1768,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1789,9 +1789,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1845,8 +1845,8 @@ ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1855,9 +1855,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: @@ -1879,8 +1879,8 @@ ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1889,9 +1889,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1952,9 +1952,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: @@ -1983,9 +1983,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -2046,10 +2046,10 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: @@ -2078,10 +2078,10 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -2142,9 +2142,9 @@ ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: @@ -2173,9 +2173,9 @@ ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 1 %2:_(p1) = G_PTR_ADD %0, %1 @@ -73,8 +73,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $sgpr0_sgpr1 %2:_(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir @@ -53,8 +53,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 1 %2:_(p1) = G_PTRMASK %0, %1 @@ -73,8 +73,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $sgpr0_sgpr1 %2:_(p1) = G_PTRMASK %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir @@ -57,9 +57,9 @@ ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: sadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: sadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: sadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir @@ -43,9 +43,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 10 @@ -68,9 +68,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -153,12 +153,12 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s32) = G_ASHR [[SBFX]], [[C3]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SBFX]](s32), [[ASHR1]](s32) @@ -184,9 +184,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 @@ -214,8 +214,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] @@ -245,8 +245,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[PRED_COPY]], [[COPY1]](s32), [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir @@ -53,9 +53,9 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -65,9 +65,9 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -92,9 +92,9 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY3]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,9 +104,9 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY3]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -130,8 +130,8 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY2]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -141,8 +141,8 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY2]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -165,9 +165,9 @@ ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_vcc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -176,9 +176,9 @@ ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -201,8 +201,8 @@ ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_vcc_sv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} @@ -211,8 +211,8 @@ ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -235,8 +235,8 @@ ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[PRED_COPY]] ; GREEDY-LABEL: name: select_s32_vcc_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} @@ -245,8 +245,8 @@ ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -338,11 +338,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -353,11 +353,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -383,11 +383,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_svs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -398,11 +398,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -427,11 +427,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_svv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -442,11 +442,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -667,11 +667,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -682,11 +682,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -712,11 +712,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -727,11 +727,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -756,11 +756,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -771,11 +771,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -996,11 +996,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1011,11 +1011,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1041,11 +1041,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1056,11 +1056,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1085,11 +1085,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1100,11 +1100,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1362,11 +1362,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1377,11 +1377,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1407,11 +1407,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1422,11 +1422,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1451,11 +1451,11 @@ ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1466,11 +1466,11 @@ ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1693,8 +1693,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_vgpr_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} @@ -1702,8 +1702,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -1724,10 +1724,10 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: select_s32_vgpr_ss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -1735,10 +1735,10 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -1759,8 +1759,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_sgpr_vv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -1768,8 +1768,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -1790,9 +1790,9 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_sgpr_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -1800,9 +1800,9 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -1823,9 +1823,9 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_sgpr_sv ; GREEDY: liveins: $sgpr0, $sgpr0, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -1833,9 +1833,9 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir @@ -209,9 +209,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 33 @@ -232,9 +232,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 35 @@ -255,9 +255,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 63 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -47,10 +47,10 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[PRED_COPY]], [[C]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_SEXT %0 ... @@ -179,8 +179,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -31,8 +31,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -49,8 +49,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -67,8 +67,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -84,8 +84,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... @@ -102,8 +102,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -229,8 +229,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -250,8 +250,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_SITOFP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SMAX %0, %1 @@ -54,8 +54,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -213,8 +213,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -235,8 +235,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir @@ -36,8 +36,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -58,8 +58,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -216,8 +216,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -238,8 +238,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir @@ -18,9 +18,9 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -45,15 +45,15 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[COPY1]] ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SMULH %0, %1 @@ -72,15 +72,15 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[PRED_COPY]] ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir @@ -57,9 +57,9 @@ ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: ssube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: ssube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: ssube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -31,8 +31,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SUB %0, %1 @@ -50,8 +50,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir @@ -56,9 +56,9 @@ ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: uadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -68,9 +68,9 @@ ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -92,10 +92,10 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: uadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -103,10 +103,10 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -128,8 +128,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: uadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -137,8 +137,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir @@ -33,8 +33,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 @@ -52,8 +52,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir @@ -43,9 +43,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 10 @@ -68,9 +68,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -153,12 +153,12 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UBFX]](s32), [[C2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -182,9 +182,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 @@ -212,8 +212,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY3]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] @@ -243,8 +243,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[PRED_COPY]], [[COPY1]](s32), [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_UITOFP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir @@ -36,8 +36,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -58,8 +58,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -218,8 +218,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -240,8 +240,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir @@ -37,8 +37,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -60,8 +60,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -222,8 +222,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -244,8 +244,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir @@ -18,9 +18,9 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -45,15 +45,15 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[COPY1]] ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_UMULH %0, %1 @@ -72,15 +72,15 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[PRED_COPY]] ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_UMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir @@ -43,7 +43,7 @@ ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY %in_addr(p1) + ; GFX1010-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY %in_addr(p1) ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) @@ -56,20 +56,20 @@ ; GFX1010-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from unknown-address + 48, align 4, addrspace 1) ; GFX1010-NEXT: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out_addr(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr(p1) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[PRED_COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) - ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_16(p1) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[PRED_COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst32:sgpr(s64) = G_CONSTANT i64 32 ; GFX1010-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) - ; GFX1010-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_32(p1) - ; GFX1010-NEXT: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_32(p1) + ; GFX1010-NEXT: G_STORE %load8_11(<4 x s32>), [[PRED_COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst48:sgpr(s64) = G_CONSTANT i64 48 ; GFX1010-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) - ; GFX1010-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_48(p1) - ; GFX1010-NEXT: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_48(p1) + ; GFX1010-NEXT: G_STORE %load12_15(<4 x s32>), [[PRED_COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %in_addr:_(p1) = COPY $sgpr0_sgpr1 %out_addr:_(p1) = COPY $sgpr2_sgpr3 @@ -117,19 +117,19 @@ ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %ptr(p4) + ; GFX1010-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY %ptr(p4) ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (<4 x s32>), align 1, addrspace 4) ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, align 1, addrspace 4) ; GFX1010-NEXT: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(p1) = PRED_COPY %out(p1) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[PRED_COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010-NEXT: %cst_16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) - ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_plus_16(p1) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[PRED_COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %ptr:_(p4) = COPY $sgpr0_sgpr1 %out:_(p1) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir @@ -57,9 +57,9 @@ ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: usube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: usube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: usube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir @@ -33,8 +33,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 @@ -52,8 +52,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -22,24 +22,24 @@ ; CHECK-NEXT: %agpr:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: %voffset:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: %zero:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY %zero(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY %zero(s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY %agpr(s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY %agpr(s32) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[PRED_COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -75,14 +75,14 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s32>) = PRED_COPY [[COPY]](<8 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s32>) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec @@ -92,7 +92,7 @@ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s32>) ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir @@ -360,22 +360,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -392,22 +392,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -424,22 +424,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -456,22 +456,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -488,22 +488,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -520,22 +520,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -552,22 +552,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: local_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: local_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) @@ -584,22 +584,22 @@ ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: private_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: private_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir @@ -12,11 +12,11 @@ ; CHECK-LABEL: name: xor_s32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s32) = G_XOR %0, %1 ... @@ -30,12 +30,12 @@ ; CHECK-LABEL: name: xor_s32_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY2]], [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[PRED_COPY2]], [[PRED_COPY1]] + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $vgpr0 %2:_(s32) = G_XOR %0, %1 ... @@ -49,12 +49,12 @@ ; CHECK-LABEL: name: xor_s32_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[COPY2]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[PRED_COPY]], [[PRED_COPY2]] + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $sgpr0 %2:_(s32) = G_XOR %0, %1 ... @@ -68,11 +68,11 @@ ; CHECK-LABEL: name: xor_s32_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[COPY1]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 %2:_(s32) = G_XOR %0, %1 ... @@ -86,20 +86,20 @@ ; CHECK-LABEL: name: xor_i1_scc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[PRED_COPY1]](s32), [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(ne), %0, %2 %5:_(s1) = G_ICMP intpred(ne), %1, %2 @@ -117,17 +117,17 @@ ; CHECK-LABEL: name: xor_i1_vcc_vcc ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY1]](s32), [[PRED_COPY3]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 %2:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(ne), %0, %2 %5:_(s1) = G_ICMP intpred(ne), %1, %2 @@ -145,18 +145,18 @@ ; CHECK-LABEL: name: xor_i1_scc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY1]](s32), [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[PRED_COPY3]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 0 %4:_(s1) = G_ICMP intpred(ne), %0, %2 %5:_(s1) = G_ICMP intpred(ne), %1, %2 @@ -173,17 +173,17 @@ ; CHECK-LABEL: name: xor_i1_sgpr_trunc_sgpr_trunc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_TRUNC %1 %4:_(s1) = G_XOR %2, %3 @@ -200,18 +200,18 @@ ; CHECK-LABEL: name: xor_i1_trunc_scc ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC2]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_ICMP intpred(ne), %0, %1 %4:_(s1) = G_XOR %2, %3 @@ -227,16 +227,16 @@ ; CHECK-LABEL: name: xor_i1_s_trunc_vcc ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY2]](s32), [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[PRED_COPY3]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $vgpr0 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_ICMP intpred(ne), %0, %1 %4:_(s1) = G_XOR %2, %3 @@ -253,11 +253,11 @@ ; CHECK-LABEL: name: xor_s64_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s64) = G_XOR [[COPY]], [[COPY1]] - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s64) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(s64) = PRED_COPY $sgpr0_sgpr1 + %1:_(s64) = PRED_COPY $sgpr2_sgpr3 %2:_(s64) = G_XOR %0, %1 ... @@ -271,15 +271,15 @@ ; CHECK-LABEL: name: xor_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $vgpr0_vgpr1 + %0:_(s64) = PRED_COPY $sgpr0_sgpr1 + %1:_(s64) = PRED_COPY $vgpr0_vgpr1 %2:_(s64) = G_XOR %0, %1 ... @@ -293,15 +293,15 @@ ; CHECK-LABEL: name: xor_s64_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $sgpr0_sgpr1 + %0:_(s64) = PRED_COPY $vgpr0_vgpr1 + %1:_(s64) = PRED_COPY $sgpr0_sgpr1 %2:_(s64) = G_XOR %0, %1 ... @@ -315,15 +315,15 @@ ; CHECK-LABEL: name: xor_s64_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 + %0:_(s64) = PRED_COPY $vgpr0_vgpr1 + %1:_(s64) = PRED_COPY $vgpr2_vgpr3 %2:_(s64) = G_XOR %0, %1 ... @@ -337,16 +337,16 @@ ; CHECK-LABEL: name: xor_s64_vv_user ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 + %0:_(s64) = PRED_COPY $vgpr0_vgpr1 + %1:_(s64) = PRED_COPY $vgpr2_vgpr3 %2:_(s64) = G_XOR %0, %1 S_NOP 0, implicit %2 ... @@ -360,18 +360,18 @@ ; CHECK-LABEL: name: xor_s64_ss_ss_merge ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(s64) = G_XOR [[MV]], [[MV1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 + %2:_(s32) = PRED_COPY $sgpr2 + %3:_(s32) = PRED_COPY $sgpr3 %4:_(s64) = G_MERGE_VALUES %0, %1 %5:_(s64) = G_MERGE_VALUES %2, %3 %6:_(s64) = G_XOR %4, %5 @@ -388,22 +388,22 @@ ; CHECK-LABEL: name: xor_s64_vv_vv_merge ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 + %0:_(s32) = PRED_COPY $vgpr0 + %1:_(s32) = PRED_COPY $vgpr1 + %2:_(s32) = PRED_COPY $vgpr2 + %3:_(s32) = PRED_COPY $vgpr3 %4:_(s64) = G_MERGE_VALUES %0, %1 %5:_(s64) = G_MERGE_VALUES %2, %3 %6:_(s64) = G_XOR %4, %5 @@ -420,20 +420,20 @@ ; CHECK-LABEL: name: xor_s64_s_sv_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY3]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 + %0:_(s64) = PRED_COPY $sgpr0_sgpr1 + %1:_(s32) = PRED_COPY $sgpr2 + %2:_(s32) = PRED_COPY $vgpr0 %3:_(s64) = G_MERGE_VALUES %1, %2 %4:_(s64) = G_XOR %0, %3 S_NOP 0, implicit %4 @@ -449,20 +449,20 @@ ; CHECK-LABEL: name: xor_s64_s_vs_merge ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s32) = COPY $sgpr2 - %2:_(s32) = COPY $vgpr0 + %0:_(s64) = PRED_COPY $sgpr0_sgpr1 + %1:_(s32) = PRED_COPY $sgpr2 + %2:_(s32) = PRED_COPY $vgpr0 %3:_(s64) = G_MERGE_VALUES %2, %1 %4:_(s64) = G_XOR %0, %3 S_NOP 0, implicit %4 @@ -478,24 +478,24 @@ ; CHECK-LABEL: name: xor_s64_sv_sv_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY5]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 + %2:_(s32) = PRED_COPY $vgpr0 + %3:_(s32) = PRED_COPY $vgpr1 %4:_(s64) = G_MERGE_VALUES %0, %2 %5:_(s64) = G_MERGE_VALUES %1, %3 %6:_(s64) = G_XOR %4, %5 @@ -512,24 +512,24 @@ ; CHECK-LABEL: name: xor_s64_sv_vs_merge ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY3]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s64) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 + %0:_(s32) = PRED_COPY $sgpr0 + %1:_(s32) = PRED_COPY $sgpr1 + %2:_(s32) = PRED_COPY $vgpr0 + %3:_(s32) = PRED_COPY $vgpr1 %4:_(s64) = G_MERGE_VALUES %0, %2 %5:_(s64) = G_MERGE_VALUES %3, %1 %6:_(s64) = G_XOR %4, %5 @@ -546,23 +546,23 @@ ; CHECK-LABEL: name: xor_chain_s64_sv ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s64) = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s64) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR]](s32), [[XOR1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR2:%[0-9]+]]:vgpr(s32) = G_XOR [[UV4]], [[UV6]] ; CHECK-NEXT: [[XOR3:%[0-9]+]]:vgpr(s32) = G_XOR [[UV5]], [[UV7]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[XOR2]](s32), [[XOR3]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[MV1]](s64) - %0:_(s64) = COPY $sgpr0_sgpr1 - %1:_(s64) = COPY $sgpr2_sgpr3 - %2:_(s64) = COPY $vgpr0_vgpr1 + %0:_(s64) = PRED_COPY $sgpr0_sgpr1 + %1:_(s64) = PRED_COPY $sgpr2_sgpr3 + %2:_(s64) = PRED_COPY $vgpr0_vgpr1 %3:_(s64) = G_XOR %0, %2 %4:_(s64) = G_XOR %1, %3 S_NOP 0, implicit %4 @@ -578,12 +578,12 @@ ; CHECK-LABEL: name: xor_v2i32_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<2 x s32>) = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s32>) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 + %0:_(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 + %1:_(<2 x s32>) = PRED_COPY $sgpr2_sgpr3 %2:_(<2 x s32>) = G_XOR %0, %1 S_NOP 0, implicit %2 ... @@ -598,16 +598,16 @@ ; CHECK-LABEL: name: xor_v2i32_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x s32>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %0:_(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 + %1:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 %2:_(<2 x s32>) = G_XOR %0, %1 S_NOP 0, implicit %2 ... @@ -623,16 +623,16 @@ ; CHECK-LABEL: name: xor_v2i32_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x s32>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $sgpr0_sgpr1 + %0:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = PRED_COPY $sgpr0_sgpr1 %2:_(<2 x s32>) = G_XOR %0, %1 S_NOP 0, implicit %2 ... @@ -647,16 +647,16 @@ ; CHECK-LABEL: name: xor_v2i32_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](<2 x s32>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(s32) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %0:_(<2 x s32>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = PRED_COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_XOR %0, %1 S_NOP 0, implicit %2 ... @@ -671,11 +671,11 @@ ; CHECK-LABEL: name: xor_v4s16_ss ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<4 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<4 x s16>) = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<4 x s16>) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 + %1:_(<4 x s16>) = PRED_COPY $sgpr2_sgpr3 %2:_(<4 x s16>) = G_XOR %0, %1 ... @@ -689,15 +689,15 @@ ; CHECK-LABEL: name: xor_v4s16_sv ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY1]](<4 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %0:_(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 + %1:_(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 %2:_(<4 x s16>) = G_XOR %0, %1 ... @@ -711,15 +711,15 @@ ; CHECK-LABEL: name: xor_v4s16_vs ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY1]](<4 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $sgpr0_sgpr1 + %0:_(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = PRED_COPY $sgpr0_sgpr1 %2:_(<4 x s16>) = G_XOR %0, %1 ... @@ -733,15 +733,15 @@ ; CHECK-LABEL: name: xor_v4s16_vv ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[PRED_COPY1]](<4 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV]], [[UV2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[UV1]], [[UV3]] ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[XOR]](<2 x s16>), [[XOR1]](<2 x s16>) - %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %0:_(<4 x s16>) = PRED_COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = PRED_COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_XOR %0, %1 ... @@ -755,11 +755,11 @@ ; CHECK-LABEL: name: xor_v2s16_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<2 x s16>) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<2 x s16>) = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:sgpr(<2 x s16>) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(<2 x s16>) = PRED_COPY $sgpr0 + %1:_(<2 x s16>) = PRED_COPY $sgpr1 %2:_(<2 x s16>) = G_XOR %0, %1 ... @@ -773,12 +773,12 @@ ; CHECK-LABEL: name: xor_v2s16_sv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY2]], [[COPY1]] - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(<2 x s16>) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[PRED_COPY]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[PRED_COPY2]], [[PRED_COPY1]] + %0:_(<2 x s16>) = PRED_COPY $sgpr0 + %1:_(<2 x s16>) = PRED_COPY $vgpr0 %2:_(<2 x s16>) = G_XOR %0, %1 ... @@ -792,12 +792,12 @@ ; CHECK-LABEL: name: xor_v2s16_vs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY2]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(<2 x s16>) = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[PRED_COPY1]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[PRED_COPY]], [[PRED_COPY2]] + %0:_(<2 x s16>) = PRED_COPY $vgpr0 + %1:_(<2 x s16>) = PRED_COPY $sgpr0 %2:_(<2 x s16>) = G_XOR %0, %1 ... @@ -811,11 +811,11 @@ ; CHECK-LABEL: name: xor_v2s16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[PRED_COPY]], [[PRED_COPY1]] + %0:_(<2 x s16>) = PRED_COPY $vgpr0 + %1:_(<2 x s16>) = PRED_COPY $vgpr1 %2:_(<2 x s16>) = G_XOR %0, %1 ... @@ -829,16 +829,16 @@ ; CHECK-LABEL: name: xor_i1_vcc_constant ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[PRED_COPY2]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) - %0:_(s32) = COPY $vgpr0 + %0:_(s32) = PRED_COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_ICMP intpred(ne), %0, %1 %3:_(s1) = G_CONSTANT i1 true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -47,9 +47,9 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[C]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_ZEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir @@ -13,8 +13,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -31,8 +31,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -49,8 +49,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -67,8 +67,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -84,8 +84,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... @@ -102,8 +102,8 @@ ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -112,8 +112,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... @@ -129,8 +129,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... @@ -146,8 +146,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (volatile invariant load (s32) from %ir.ptr1) ... @@ -163,8 +163,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load acquire (s32) from %ir.ptr1) ... @@ -180,8 +180,8 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -44,14 +44,14 @@ } ; GCN-LABEL: name: test_sgpr_plus_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr2 ; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, define amdgpu_ps void @test_sgpr_plus_imm_offset(ptr addrspace(4) inreg %base, i32 inreg %offset, @@ -65,14 +65,14 @@ } ; GCN-LABEL: name: test_sgpr_plus_imm_offset_x2 -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr2 ; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; SDAG: S_LOAD_DWORDX2_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORDX2_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(ptr addrspace(4) inreg %base, i32 inreg %offset, @@ -86,18 +86,18 @@ } ; GCN-LABEL: name: test_buffer_load_sgpr_plus_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2 -; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr4 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = PRED_COPY $sgpr2 +; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = PRED_COPY $sgpr3 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr4 ; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 77, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2 -; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = PRED_COPY $sgpr2 +; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = PRED_COPY $sgpr3 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr4 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77, define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) { @@ -108,19 +108,19 @@ } ; GCN-LABEL: name: test_buffer_load_sgpr_or_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2 -; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3 -; SDAG-DAG: %[[INDEX:.*]]:sgpr_32 = COPY $sgpr4 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = PRED_COPY $sgpr2 +; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = PRED_COPY $sgpr3 +; SDAG-DAG: %[[INDEX:.*]]:sgpr_32 = PRED_COPY $sgpr4 ; SDAG-DAG: %[[SHIFT:.*]]:sreg_32 = S_LSHL_B32 %[[INDEX]], ; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[SHIFT]], 5, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2 -; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3 -; GISEL-DAG: %[[INDEX:.*]]:sreg_32 = COPY $sgpr4 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = PRED_COPY $sgpr2 +; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = PRED_COPY $sgpr3 +; GISEL-DAG: %[[INDEX:.*]]:sreg_32 = PRED_COPY $sgpr4 ; GISEL-DAG: %[[SHIFT:.*]]:sreg_32 = S_LSHL_B32 %[[INDEX]], ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[SHIFT]], 5, diff --git a/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir --- a/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir +++ b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir @@ -21,10 +21,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY killed $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], killed [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[PRED_COPY1]], implicit-def dead $scc ; CHECK-NEXT: $exec_lo = S_MOV_B32_term killed [[S_AND_B32_]] ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 @@ -34,8 +34,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %15, 0, implicit $exec - ; CHECK-NEXT: %7:vgpr_32, dead %8:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %7, %subreg.sub1 + ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed [[V_ADDC_U32_e64_]], %subreg.sub1 ; CHECK-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8), addrspace 1) @@ -55,14 +55,14 @@ ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec - ; CHECK-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; CHECK-NEXT: dead [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 killed [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll @@ -9,27 +9,27 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -40,29 +40,29 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -73,29 +73,29 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -106,33 +106,33 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll @@ -8,29 +8,29 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %ret @@ -41,31 +41,31 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %ret @@ -76,31 +76,31 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret float %ret @@ -111,35 +111,35 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll @@ -7,17 +7,17 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[PRED_COPY7]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -28,18 +28,18 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,18 +50,18 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -72,20 +72,20 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[PRED_COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -96,21 +96,21 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY8]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY9]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY8]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY9]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret @@ -121,22 +121,22 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY10]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret @@ -147,22 +147,22 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY10]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret @@ -173,24 +173,24 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY10]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY11]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[PRED_COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY11]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -8,27 +8,27 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -39,29 +39,29 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,29 +72,29 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -105,33 +105,33 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll @@ -7,15 +7,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -26,16 +26,16 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret @@ -46,16 +46,16 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -66,18 +66,18 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -6,191 +6,191 @@ ; GCN: bb.0.bb.0: ; GCN-NEXT: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 16, align 1, addrspace 7) ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 32, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 48, align 1, addrspace 7) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 64, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 80, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 80, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 96, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 112, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY2]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY2]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY3]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY3]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY4]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY4]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY5]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY5]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY6]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY7]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY8]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY8]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY9]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY9]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY10]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY10]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY11]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY11]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY12]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY13]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY13]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY14]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY14]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY15]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY15]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY16]], [[S_LOAD_DWORDX4_IMM]], [[COPY17]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY16]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY17]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY21]], [[S_LOAD_DWORDX4_IMM]], [[COPY22]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY21]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY22]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY23]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY23]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY24]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY24]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY25]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[COPY27]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY26]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY27]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) - ; GCN-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[PRED_COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[PRED_COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[PRED_COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[COPY32]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[PRED_COPY31]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY32]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[PRED_COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[PRED_COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY35]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[PRED_COPY35]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY37:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[COPY37]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[PRED_COPY36]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY37]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, ptr addrspace(6) %arg0, align 16, !invariant.load !0 diff --git a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll --- a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll @@ -55,59 +55,59 @@ ; MIR-NEXT: successors: %bb.1(0x80000000) ; MIR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; MIR-NEXT: {{ $}} - ; MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; MIR-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; MIR-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[S_MOV_B64_]], 0, 0 :: (invariant load (s64) from `ptr addrspace(4) null`, align 4294967296, addrspace 4) - ; MIR-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; MIR-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; MIR-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; MIR-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MIR-NEXT: S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; MIR-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; MIR-NEXT: $scc = COPY [[COPY5]] - ; MIR-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY3]], [[S_MOV_B32_]], implicit $scc + ; MIR-NEXT: S_CMP_LG_U32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; MIR-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY $scc + ; MIR-NEXT: $scc = PRED_COPY [[PRED_COPY5]] + ; MIR-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY3]], [[S_MOV_B32_]], implicit $scc ; MIR-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; MIR-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_MOV_B32_1]], [[COPY4]], implicit-def dead $scc - ; MIR-NEXT: S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; MIR-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; MIR-NEXT: $scc = COPY [[COPY6]] + ; MIR-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_MOV_B32_1]], [[PRED_COPY4]], implicit-def dead $scc + ; MIR-NEXT: S_CMP_LG_U32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; MIR-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY $scc + ; MIR-NEXT: $scc = PRED_COPY [[PRED_COPY6]] ; MIR-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_LSHR_B32_]], [[S_MOV_B32_]], implicit $scc - ; MIR-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY3]], [[S_MOV_B32_1]], implicit-def dead $scc - ; MIR-NEXT: S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; MIR-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; MIR-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; MIR-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; MIR-NEXT: $scc = COPY [[COPY7]] + ; MIR-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY3]], [[S_MOV_B32_1]], implicit-def dead $scc + ; MIR-NEXT: S_CMP_LG_U32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; MIR-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY $scc + ; MIR-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; MIR-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; MIR-NEXT: $scc = PRED_COPY [[PRED_COPY7]] ; MIR-NEXT: [[S_CSELECT_B32_2:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_LSHR_B32_1]], [[S_MOV_B32_]], implicit $scc ; MIR-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 killed [[S_CSELECT_B32_2]], 0, 0, implicit $mode, implicit $exec ; MIR-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; MIR-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[V_CVT_F32_I32_e64_]] - ; MIR-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 killed [[COPY9]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MIR-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[V_CVT_F32_I32_e64_]] + ; MIR-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 killed [[PRED_COPY9]], killed [[S_MOV_B32_2]], implicit-def dead $scc ; MIR-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; MIR-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; MIR-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_3]] - ; MIR-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_4]], 0, [[COPY10]], [[COPY7]], implicit $exec - ; MIR-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY [[V_CNDMASK_B32_e64_]] + ; MIR-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[S_MOV_B32_3]] + ; MIR-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_4]], 0, [[PRED_COPY10]], [[PRED_COPY7]], implicit $exec + ; MIR-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr_32 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; MIR-NEXT: [[V_CVT_F32_UBYTE0_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e64 killed [[S_CSELECT_B32_1]], 0, 0, implicit $exec - ; MIR-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_UBYTE0_e64_]] + ; MIR-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr_32 = PRED_COPY [[V_CVT_F32_UBYTE0_e64_]] ; MIR-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 killed [[S_CSELECT_B32_]], 0, 0, implicit $mode, implicit $exec - ; MIR-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_I32_e64_1]] - ; MIR-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[S_MOV_B32_1]], implicit $exec - ; MIR-NEXT: [[COPY14:%[0-9]+]]:vreg_1 = COPY [[V_CMP_LT_I32_e64_]] + ; MIR-NEXT: [[PRED_COPY13:%[0-9]+]]:sgpr_32 = PRED_COPY [[V_CVT_F32_I32_e64_1]] + ; MIR-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PRED_COPY2]], [[S_MOV_B32_1]], implicit $exec + ; MIR-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_1 = PRED_COPY [[V_CMP_LT_I32_e64_]] ; MIR-NEXT: {{ $}} ; MIR-NEXT: bb.1.bb14: ; MIR-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; MIR-NEXT: {{ $}} ; MIR-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %7, %bb.1 ; MIR-NEXT: [[PHI1:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_4]], %bb.0, %8, %bb.1 - ; MIR-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY14]] - ; MIR-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[COPY15]], [[PHI]], implicit-def dead $scc - ; MIR-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PHI1]], 0, [[COPY11]], 0, 0, implicit $mode, implicit $exec + ; MIR-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY14]] + ; MIR-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PRED_COPY15]], [[PHI]], implicit-def dead $scc + ; MIR-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PHI1]], 0, [[PRED_COPY11]], 0, 0, implicit $mode, implicit $exec ; MIR-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, [[S_AND_B32_]], 0, 0, implicit $mode, implicit $exec - ; MIR-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_1]], 0, [[COPY12]], 0, 0, implicit $mode, implicit $exec - ; MIR-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_2]], 0, [[COPY13]], 0, 0, implicit $mode, implicit $exec - ; MIR-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY [[V_ADD_F32_e64_3]] + ; MIR-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_1]], 0, [[PRED_COPY12]], 0, 0, implicit $mode, implicit $exec + ; MIR-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_2]], 0, [[PRED_COPY13]], 0, 0, implicit $mode, implicit $exec + ; MIR-NEXT: [[PRED_COPY16:%[0-9]+]]:sgpr_32 = PRED_COPY [[V_ADD_F32_e64_3]] ; MIR-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; MIR-NEXT: S_BRANCH %bb.2 ; MIR-NEXT: {{ $}} @@ -115,7 +115,7 @@ ; MIR-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1 ; MIR-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[SI_IF_BREAK]], %bb.1 ; MIR-NEXT: SI_END_CF [[PHI3]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; MIR-NEXT: FLAT_STORE_DWORD [[COPY8]], [[PHI2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.ptr) + ; MIR-NEXT: FLAT_STORE_DWORD [[PRED_COPY8]], [[PHI2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.ptr) ; MIR-NEXT: SI_RETURN bb: %i = load <2 x i32>, ptr addrspace(4) null, align 4294967296 diff --git a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir --- a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir @@ -17,13 +17,13 @@ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF1]] - ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[COPY]], implicit-def $vcc_lo, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[DEF3]] - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF1]] + ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[PRED_COPY]], implicit-def $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF3]] + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[PRED_COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] - ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_2]] + ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[PRED_COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec %0:sreg_32 = S_MOV_B32 681 %1:sreg_32 = IMPLICIT_DEF %2:sreg_32 = IMPLICIT_DEF @@ -54,11 +54,11 @@ ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF2]], [[DEF]], implicit-def $vcc_lo, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF4]] - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF4]] + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[PRED_COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec ; GCN-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF5]] - ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF5]] + ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[PRED_COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:sreg_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir --- a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir @@ -44,7 +44,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[COPY1]] ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -12,16 +12,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -30,7 +30,7 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: S_ENDPGM 0 bb.0: @@ -67,16 +67,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -89,7 +89,7 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -123,16 +123,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -146,7 +146,7 @@ ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -183,16 +183,16 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -208,7 +208,7 @@ ; GCN-NEXT: KILL [[DEF]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -247,16 +247,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -270,10 +270,10 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[S_BREV_B32_:%[0-9]+]]:sgpr_32 = S_BREV_B32 [[DEF]] ; GCN-NEXT: KILL [[DEF]] - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -311,16 +311,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -331,11 +331,11 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -370,8 +370,8 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -379,8 +379,8 @@ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -391,11 +391,11 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub2 + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -430,16 +430,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} @@ -453,7 +453,7 @@ ; GCN-NEXT: S_BRANCH %bb.4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -530,9 +530,9 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} @@ -552,8 +552,8 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -564,7 +564,7 @@ ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: ; GCN-NEXT: $exec = S_OR_B64 $exec, [[S_AND_B64_1]], implicit-def $scc @@ -616,8 +616,8 @@ ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -629,7 +629,7 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) @@ -643,9 +643,9 @@ ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.0(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[COPY1]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY1]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: S_BRANCH %bb.0 @@ -687,16 +687,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -707,7 +707,7 @@ ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: S_ENDPGM 0 @@ -750,16 +750,16 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} @@ -774,7 +774,7 @@ ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.4(0x80000000) @@ -825,8 +825,8 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF]], implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -836,8 +836,8 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 @@ -847,8 +847,8 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF2]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 @@ -858,8 +858,8 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_3]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; GCN-NEXT: S_BRANCH %bb.4 @@ -872,7 +872,7 @@ ; GCN-NEXT: bb.7: ; GCN-NEXT: successors: %bb.8(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY2]], implicit-def $scc ; GCN-NEXT: S_BRANCH %bb.8 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.8: @@ -885,9 +885,9 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF4]], implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[PRED_COPY4]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_4]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; GCN-NEXT: S_BRANCH %bb.11 @@ -912,7 +912,7 @@ ; GCN-NEXT: S_BRANCH %bb.10 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.14: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.14 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll @@ -7,12 +7,12 @@ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_MOV_B32_1]], implicit-def dead $scc - ; CHECK-NEXT: $vcc_lo = COPY [[S_AND_B32_]] + ; CHECK-NEXT: $vcc_lo = PRED_COPY [[S_AND_B32_]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} @@ -20,24 +20,24 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; CHECK-NEXT: %20:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %1:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, %20, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %2:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, %1, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb11: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %1, %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %2, %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_FMAC_F32_e64_1]], %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_ADD_F32_e64_]], %bb.1 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, [[S_MOV_B32_2]], %bb.1 ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI2]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CNDMASK_B32_e64_]] - ; CHECK-NEXT: S_CMP_LG_U32 killed [[COPY1]], killed [[S_MOV_B32_3]], implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, killed [[COPY2]], implicit-def dead $scc - ; CHECK-NEXT: $vcc_lo = COPY [[S_AND_B32_1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: S_CMP_LG_U32 killed [[PRED_COPY1]], killed [[S_MOV_B32_3]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, killed [[PRED_COPY2]], implicit-def dead $scc + ; CHECK-NEXT: $vcc_lo = PRED_COPY [[S_AND_B32_1]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -7,8 +7,8 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 2, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 killed [[V_AND_B32_e64_]], 0, implicit $exec ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -17,13 +17,13 @@ ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2.UnifiedReturnBlock: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[V_LSHLREV_B32_e64_]], %bb.1 + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[V_LSHLREV_B32_e64_]], %bb.1 ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = and i32 %x, 2 @@ -45,16 +45,16 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 2, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_1]], 0, implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc @@ -62,23 +62,23 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1.out.true: ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY3]], killed [[S_MOV_B64_]], implicit-def dead $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[PRED_COPY3]], killed [[S_MOV_B64_]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[S_MOV_B32_3]], %subreg.sub2, killed [[S_MOV_B32_2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY5]], %subreg.sub0, killed [[PRED_COPY4]], %subreg.sub1, killed [[S_MOV_B32_3]], %subreg.sub2, killed [[S_MOV_B32_2]], %subreg.sub3 ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2.out.else: - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY7]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[S_MOV_B32_5]], %subreg.sub2, killed [[S_MOV_B32_4]], %subreg.sub3 - ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY3]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY7]], %subreg.sub0, killed [[PRED_COPY6]], %subreg.sub1, killed [[S_MOV_B32_5]], %subreg.sub2, killed [[S_MOV_B32_4]], %subreg.sub3 + ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PRED_COPY3]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_1]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 entry: diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll @@ -10,7 +10,7 @@ ; GCN-LABEL: name: s_ctpop_i64 ; GCN: %[[BCNT:[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 -; GCN: %[[SREG1:[0-9]+]]:sreg_32 = COPY %[[BCNT]] +; GCN: %[[SREG1:[0-9]+]]:sreg_32 = PRED_COPY %[[BCNT]] ; GCN: %[[SREG2:[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: REG_SEQUENCE killed %[[SREG1]], %subreg.sub0, killed %[[SREG2]], %subreg.sub1 define amdgpu_kernel void @s_ctpop_i64(ptr addrspace(1) noalias %out, i64 %val) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -6,22 +6,22 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]] ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[PRED_COPY4]], killed [[PRED_COPY3]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -36,16 +36,16 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY1]], 0, 16, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY1]], 0, 16, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_BFE_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i16 %x, 0 %select = select i1 %setcc, i1 true, i1 %z @@ -57,23 +57,23 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s64) from %ir.x.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s64) from %ir.x.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[PRED_COPY4]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_LT_I32 killed [[COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY6]], killed [[COPY5]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_LT_I32 killed [[PRED_COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[PRED_COPY6]], killed [[PRED_COPY5]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -88,15 +88,15 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[PRED_COPY1]], killed [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i32 %x, 0 %select = select i1 %setcc, i1 true, i1 %z @@ -108,27 +108,27 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 13, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 13, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[COPY3]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY4]], %subreg.sub0, killed [[PRED_COPY3]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub3 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY5]], %subreg.sub1 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[PRED_COPY8]], implicit $exec + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[PRED_COPY7]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.2, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -143,18 +143,18 @@ ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[PRED_COPY3]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i64 %x, 0 %select = select i1 %setcc, i1 true, i1 %z diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir --- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir +++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -35,9 +35,9 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 - ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31 + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = PRED_COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = PRED_COPY [[COPY]].sub29_sub30_sub31 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -10,18 +10,18 @@ ; GCN: bb.0.main_body: ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[PRED_COPY3]], %subreg.sub0, killed [[PRED_COPY2]], %subreg.sub1, killed [[PRED_COPY1]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]] ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF2]] ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[PRED_COPY4]], [[PRED_COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 7) ; GCN-NEXT: S_ENDPGM 0 main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -9,26 +9,26 @@ ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_DIV_SCALE_F32_e64_2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_RCP_F32_e64_]], 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[V_DIV_SCALE_F32_e64_]], 0, [[V_FMA_F32_e64_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_MUL_F32_e64_]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_2]], 0, [[V_FMA_F32_e64_1]], 0, [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_FMA_F32_e64_3]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: $vcc = PRED_COPY [[V_DIV_SCALE_F32_e64_1]] + ; GCN-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed [[V_FMA_F32_e64_4]], 0, [[V_FMA_F32_e64_1]], 0, [[V_FMA_F32_e64_3]], 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed [[V_DIV_FMAS_F32_e64_]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_DIV_FIXUP_F32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv float %a, %b @@ -40,26 +40,26 @@ ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, [[V_DIV_SCALE_F32_e64_2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_RCP_F32_e64_]], 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, [[V_DIV_SCALE_F32_e64_]], 0, [[V_FMA_F32_e64_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_MUL_F32_e64_]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_3:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_2]], 0, [[V_FMA_F32_e64_1]], 0, [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_4:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_FMA_F32_e64_3]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: $vcc = PRED_COPY [[V_DIV_SCALE_F32_e64_1]] + ; GCN-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed [[V_FMA_F32_e64_4]], 0, [[V_FMA_F32_e64_1]], 0, [[V_FMA_F32_e64_3]], 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed [[V_DIV_FMAS_F32_e64_]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_DIV_FIXUP_F32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir --- a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s # Check that we first do all vector instructions and only then change exec -# CHECK-DAG: COPY $vgpr10_vgpr11 -# CHECK-DAG: COPY $vgpr12_vgpr13 -# CHECK: $exec = COPY +# CHECK-DAG: PRED_COPY $vgpr10_vgpr11 +# CHECK-DAG: PRED_COPY $vgpr12_vgpr13 +# CHECK: $exec = PRED_COPY --- name: main @@ -35,10 +35,10 @@ liveins: $vgpr3, $vgpr10_vgpr11, $vgpr12_vgpr13 $vcc = V_CMP_NE_U32_e64 0, killed $vgpr3, implicit $exec - $sgpr4_sgpr5 = COPY $exec, implicit-def $exec + $sgpr4_sgpr5 = PRED_COPY $exec, implicit-def $exec $sgpr6_sgpr7 = S_AND_B64 $sgpr4_sgpr5, killed $vcc, implicit-def dead $scc $sgpr4_sgpr5 = S_XOR_B64 $sgpr6_sgpr7, killed $sgpr4_sgpr5, implicit-def dead $scc - $vgpr61_vgpr62 = COPY $vgpr10_vgpr11 - $vgpr155_vgpr156 = COPY $vgpr12_vgpr13 + $vgpr61_vgpr62 = PRED_COPY $vgpr10_vgpr11 + $vgpr155_vgpr156 = PRED_COPY $vgpr12_vgpr13 $exec = S_MOV_B64_term killed $sgpr6_sgpr7 ... diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll @@ -7,23 +7,23 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret void @@ -34,25 +34,25 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret float %ret @@ -63,23 +63,23 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret void @@ -90,25 +90,25 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll @@ -7,15 +7,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret void @@ -26,19 +26,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret double %ret @@ -49,15 +49,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic ret void @@ -68,19 +68,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll @@ -6,12 +6,12 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret void @@ -22,13 +22,13 @@ ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll @@ -370,10 +370,10 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1 @@ -392,12 +392,12 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1 +; GCN: %[[XOR_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[XOR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx @@ -412,10 +412,10 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1 @@ -434,12 +434,12 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1 +; GCN: %[[AND_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[AND]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx @@ -455,10 +455,10 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1 @@ -478,12 +478,12 @@ ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1 +; GCN: %[[OR_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[OR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir --- a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir @@ -2,9 +2,9 @@ # CHECK: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # CHECK: %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# CHECK: %4:vgpr_32 = COPY %3 +# CHECK: %4:vgpr_32 = PRED_COPY %3 # CHECK: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# CHECK: %7:vgpr_32 = COPY %3 +# CHECK: %7:vgpr_32 = PRED_COPY %3 --- name: fold_cndmask @@ -25,7 +25,7 @@ %2 = V_CNDMASK_B32_e64 0, %1, 0, %1, %0, implicit $exec %3 = IMPLICIT_DEF %4 = V_CNDMASK_B32_e64 0, %3, 0, %3, %0, implicit $exec - %5 = COPY %1 + %5 = PRED_COPY %1 %6 = V_CNDMASK_B32_e64 0, %5, 0, 0, %0, implicit $exec $vcc = IMPLICIT_DEF %7 = V_CNDMASK_B32_e32 %3, %3, implicit $exec, implicit $vcc diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -14,8 +14,8 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -37,9 +37,9 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc ; GCN-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF1]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ADD_CO_U32_e32_1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[V_ADD_CO_U32_e32_1]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -92,8 +92,8 @@ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc + ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[PRED_COPY]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir --- a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir @@ -6,7 +6,7 @@ # aren't made in users before the def is seen. # GCN-LABEL: name: mov_in_use_list_2x{{$}} -# GCN: %3:vgpr_32 = COPY undef %0 +# GCN: %3:vgpr_32 = PRED_COPY undef %0 name: mov_in_use_list_2x @@ -26,7 +26,7 @@ bb.1: successors: %bb.2 - %2 = COPY %1 + %2 = PRED_COPY %1 %3 = V_XOR_B32_e64 killed %2, undef %0, implicit $exec S_NOP 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/fold-readlane.mir b/llvm/test/CodeGen/AMDGPU/fold-readlane.mir --- a/llvm/test/CodeGen/AMDGPU/fold-readlane.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-readlane.mir @@ -28,7 +28,7 @@ # GCN-LABEL: name: fold-imm-readfirstlane-readfirstlane{{$}} # GCN: %1:sreg_32_xm0 = S_MOV_B32 123 -# GCN: %3:sreg_32_xm0 = COPY %1 +# GCN: %3:sreg_32_xm0 = PRED_COPY %1 --- name: fold-imm-readfirstlane-readfirstlane @@ -37,29 +37,29 @@ bb.0: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec - %2:vgpr_32 = COPY %1 + %2:vgpr_32 = PRED_COPY %1 %3:sreg_32_xm0 = V_READFIRSTLANE_B32 %2, implicit $exec S_NOP 0, implicit %3 ... # GCN-LABEL: name: fold-copy-readfirstlane{{$}} -# GCN: %0:sreg_32_xm0 = COPY $sgpr10 -# GCN: %2:sreg_32_xm0 = COPY %0 +# GCN: %0:sreg_32_xm0 = PRED_COPY $sgpr10 +# GCN: %2:sreg_32_xm0 = PRED_COPY %0 --- name: fold-copy-readfirstlane tracksRegLiveness: true body: | bb.0: liveins: $sgpr10 - %0:sreg_32_xm0 = COPY $sgpr10 - %1:vgpr_32 = COPY %0 + %0:sreg_32_xm0 = PRED_COPY $sgpr10 + %1:vgpr_32 = PRED_COPY %0 %2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec S_NOP 0, implicit %2 ... # GCN-LABEL: name: no-fold-copy-readfirstlane-physreg0{{$}} -# GCN: %0:vgpr_32 = COPY $sgpr10 +# GCN: %0:vgpr_32 = PRED_COPY $sgpr10 # GCN-NEXT: %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec --- name: no-fold-copy-readfirstlane-physreg0 @@ -67,13 +67,13 @@ body: | bb.0: liveins: $sgpr10 - %0:vgpr_32 = COPY $sgpr10 + %0:vgpr_32 = PRED_COPY $sgpr10 %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec ... # GCN-LABEL: name: no-fold-copy-readfirstlane-physreg1{{$}} -# GCN: $vgpr0 = COPY $sgpr10 +# GCN: $vgpr0 = PRED_COPY $sgpr10 # GCN-NEXT: %0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec --- name: no-fold-copy-readfirstlane-physreg1 @@ -81,7 +81,7 @@ body: | bb.0: liveins: $sgpr10 - $vgpr0 = COPY $sgpr10 + $vgpr0 = PRED_COPY $sgpr10 %0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ... @@ -128,7 +128,7 @@ body: | bb.0: liveins: $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 + %0:sreg_32_xm0 = PRED_COPY $sgpr12 %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec bb.1: @@ -145,7 +145,7 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 + %0:sreg_32_xm0 = PRED_COPY $sgpr12 %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec $exec = S_MOV_B64_term $sgpr10_sgpr11 @@ -155,7 +155,7 @@ # GCN-LABEL: name: fold-copy-readfirstlane-same-block-exec-def{{$}} # GCN: COPY -# GCN-NEXT: %1:vgpr_32 = COPY %0 +# GCN-NEXT: %1:vgpr_32 = PRED_COPY %0 # GCN-NEXT: $exec = S_MOV_B64 # GCN-NEXT: V_READFIRSTLANE_B32 --- @@ -164,8 +164,8 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 - %1:vgpr_32 = COPY %0, implicit $exec + %0:sreg_32_xm0 = PRED_COPY $sgpr12 + %1:vgpr_32 = PRED_COPY %0, implicit $exec $exec = S_MOV_B64 $sgpr10_sgpr11 %2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec @@ -215,7 +215,7 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:vgpr_32 = COPY $sgpr12 + %0:vgpr_32 = PRED_COPY $sgpr12 $exec = S_MOV_B64 $sgpr10_sgpr11 %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec ... @@ -230,8 +230,8 @@ liveins: $vgpr0, $sgpr0_sgpr1 %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec - %2:sreg_32_xm0 = COPY %1 - %3:sreg_32_xm0 = COPY %2 + %2:sreg_32_xm0 = PRED_COPY %1 + %3:sreg_32_xm0 = PRED_COPY %2 S_ENDPGM 0, implicit %3 ... @@ -249,7 +249,7 @@ ... # GCN-LABEL: name: fold-imm-readlane-src1{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN: V_READLANE_B32 %0, 12, implicit $exec --- name: fold-imm-readlane-src1 @@ -257,7 +257,7 @@ body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:sreg_32_xm0 = S_MOV_B32 12 %2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec ... @@ -265,7 +265,7 @@ # Constant for subreg0 # GCN-LABEL: name: fold-imm-readfirstlane-regsequence0{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec @@ -276,7 +276,7 @@ body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec @@ -286,7 +286,7 @@ # Constant for subreg1 # GCN-LABEL: name: fold-imm-readfirstlane-regsequence1{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, killed %0, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0 @@ -298,7 +298,7 @@ body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, killed %0:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec @@ -348,8 +348,8 @@ # FIXME: This should fold # GCN-LABEL: name: fold-copy-readfirstlane-regsequence0{{$}} -# GCN: %0:vgpr_32 = COPY $sgpr10 -# GCN-NEXT: %1:vgpr_32 = COPY $sgpr11 +# GCN: %0:vgpr_32 = PRED_COPY $sgpr10 +# GCN-NEXT: %1:vgpr_32 = PRED_COPY $sgpr11 # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec # GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec @@ -359,18 +359,18 @@ body: | bb.0: liveins: $sgpr10, $sgpr11 - %0:vgpr_32 = COPY $sgpr10 - %1:vgpr_32 = COPY $sgpr11 + %0:vgpr_32 = PRED_COPY $sgpr10 + %1:vgpr_32 = PRED_COPY $sgpr11 %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec ... # GCN-LABEL: name: fold-copy-readfirstlane-regsequence1{{$}} -# GCN: %0:sreg_32_xm0 = COPY $sgpr10 -# GCN-NEXT: %1:sreg_32_xm0 = COPY $sgpr11 -# GCN-NEXT: %2:vgpr_32 = COPY %0 -# GCN-NEXT: %3:vgpr_32 = COPY %1 +# GCN: %0:sreg_32_xm0 = PRED_COPY $sgpr10 +# GCN-NEXT: %1:sreg_32_xm0 = PRED_COPY $sgpr11 +# GCN-NEXT: %2:vgpr_32 = PRED_COPY %0 +# GCN-NEXT: %3:vgpr_32 = PRED_COPY %1 # GCN-NEXT: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, killed %3, %subreg.sub1 # GCN-NEXT: %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0, implicit $exec # GCN-NEXT: %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1, implicit $exec @@ -380,10 +380,10 @@ body: | bb.0: liveins: $sgpr10, $sgpr11 - %0:sreg_32_xm0 = COPY $sgpr10 - %1:sreg_32_xm0 = COPY $sgpr11 - %2:vgpr_32 = COPY %0 - %3:vgpr_32 = COPY %1 + %0:sreg_32_xm0 = PRED_COPY $sgpr10 + %1:sreg_32_xm0 = PRED_COPY $sgpr11 + %2:vgpr_32 = PRED_COPY %0 + %3:vgpr_32 = PRED_COPY %1 %4:vreg_64 = REG_SEQUENCE %2:vgpr_32, %subreg.sub0, killed %3:vgpr_32, %subreg.sub1 %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0:vreg_64, implicit $exec %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1:vreg_64, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll @@ -9,23 +9,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void @@ -36,23 +36,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void @@ -63,23 +63,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void @@ -90,23 +90,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret void @@ -117,23 +117,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void @@ -144,23 +144,23 @@ ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret void diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll @@ -8,25 +8,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret @@ -37,25 +37,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret @@ -66,25 +66,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret @@ -95,25 +95,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data) ret float %ret @@ -124,25 +124,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret @@ -153,25 +153,25 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll @@ -7,15 +7,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void @@ -26,19 +26,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -49,15 +49,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void @@ -68,19 +68,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -91,15 +91,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void @@ -110,19 +110,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -133,15 +133,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void @@ -152,19 +152,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret @@ -175,15 +175,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void @@ -194,19 +194,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret @@ -217,15 +217,15 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void @@ -236,19 +236,19 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll @@ -8,23 +8,23 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -35,23 +35,23 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -62,23 +62,23 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void @@ -89,23 +89,23 @@ ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll @@ -7,13 +7,13 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -24,13 +24,13 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -41,13 +41,13 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret @@ -58,13 +58,13 @@ ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir --- a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir @@ -133,14 +133,14 @@ ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY %31 ; CHECK-NEXT: S_NOP 0, implicit %31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY %29 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY %29 ; CHECK-NEXT: S_NOP 0, implicit %29 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128 = COPY %27 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_128 = PRED_COPY %27 ; CHECK-NEXT: S_NOP 0, implicit %27 ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]] ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE1]] ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE2]] @@ -156,10 +156,10 @@ ; CHECK-NEXT: S_NOP 0, implicit %0 ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE6]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY3]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY2]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY3]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY2]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY]] bb.0: S_NOP 0, implicit-def %0:vreg_128 S_NOP 0, implicit-def %1:vreg_128 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir --- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir @@ -23,12 +23,12 @@ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; CHECK-NEXT: undef %9.sub1:vreg_64 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %9.sub1 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %11.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 + ; CHECK-NEXT: undef %11.sub0:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE]].sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %11.sub0 - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 + ; CHECK-NEXT: undef %7.sub1:vreg_64 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %7.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) @@ -67,17 +67,17 @@ ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %13.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 + ; CHECK-NEXT: undef %13.sub0:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE]].sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def %13.sub1 - ; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %13.sub0 + ; CHECK-NEXT: undef %15.sub0:vreg_64 = PRED_COPY %13.sub0, implicit $exec ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1 + ; CHECK-NEXT: undef %7.sub1:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE1]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def %7.sub0 - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %7.sub1 + ; CHECK-NEXT: undef %9.sub1:vreg_64 = PRED_COPY %7.sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: undef %14.sub0:vreg_64 = COPY %15.sub0 + ; CHECK-NEXT: undef %14.sub0:vreg_64 = PRED_COPY %15.sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %14.sub0 - ; CHECK-NEXT: undef %8.sub1:vreg_64 = COPY %9.sub1 + ; CHECK-NEXT: undef %8.sub1:vreg_64 = PRED_COPY %9.sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %8.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir --- a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir +++ b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir @@ -36,7 +36,7 @@ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI %15, %bb.6 ; GCN-NEXT: SI_END_CF [[PHI]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec ; GCN-NEXT: S_BRANCH %bb.5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: @@ -45,8 +45,8 @@ ; GCN-NEXT: ATOMIC_FENCE 5, 2 ; GCN-NEXT: S_BARRIER ; GCN-NEXT: ATOMIC_FENCE 4, 2 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY %18 - ; GCN-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[COPY6]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY %18 + ; GCN-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[COPY5]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: @@ -57,7 +57,7 @@ ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, [[COPY5]], %bb.2 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, [[PRED_COPY]], %bb.2 ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} @@ -65,8 +65,8 @@ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_1]], %bb.1, %15, %bb.6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[COPY4]] - ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[COPY7]], [[PHI2]], implicit-def dead $scc + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[COPY4]] + ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[COPY6]], [[PHI2]], implicit-def dead $scc ; GCN-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -13,16 +13,16 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY killed $vgpr0 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 + ; GCN-NEXT: renamable $sgpr6 = PRED_COPY renamable $sgpr1 + ; GCN-NEXT: renamable $sgpr0 = PRED_COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1 - ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 - ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 - ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 + ; GCN-NEXT: undef renamable $sgpr0 = PRED_COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr1 = PRED_COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $sgpr2 = PRED_COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $sgpr3 = PRED_COPY killed renamable $sgpr4 ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 @@ -40,38 +40,38 @@ ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr10 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr7 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr5 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr4 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr3 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0 - ; GCN-NEXT: undef %28.sub0:vreg_512 = COPY [[COPY1]] - ; GCN-NEXT: %28.sub1:vreg_512 = COPY [[COPY2]] - ; GCN-NEXT: %28.sub2:vreg_512 = COPY [[COPY3]] - ; GCN-NEXT: %28.sub3:vreg_512 = COPY [[COPY4]] - ; GCN-NEXT: %28.sub4:vreg_512 = COPY [[COPY5]] - ; GCN-NEXT: %28.sub5:vreg_512 = COPY [[COPY6]] - ; GCN-NEXT: %28.sub6:vreg_512 = COPY [[COPY7]] - ; GCN-NEXT: %28.sub7:vreg_512 = COPY [[COPY8]] - ; GCN-NEXT: %28.sub8:vreg_512 = COPY [[COPY9]] - ; GCN-NEXT: %28.sub9:vreg_512 = COPY [[COPY10]] - ; GCN-NEXT: %28.sub10:vreg_512 = COPY [[COPY11]] - ; GCN-NEXT: %28.sub11:vreg_512 = COPY [[COPY12]] - ; GCN-NEXT: %28.sub12:vreg_512 = COPY [[COPY13]] - ; GCN-NEXT: %28.sub13:vreg_512 = COPY [[COPY14]] - ; GCN-NEXT: %28.sub14:vreg_512 = COPY [[COPY15]] - ; GCN-NEXT: %28.sub15:vreg_512 = COPY [[COPY16]] + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr10 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr9 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr7 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr6 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr5 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr4 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr3 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr2 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr1 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY killed renamable $sgpr0 + ; GCN-NEXT: undef %28.sub0:vreg_512 = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: %28.sub1:vreg_512 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: %28.sub2:vreg_512 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: %28.sub3:vreg_512 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: %28.sub4:vreg_512 = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: %28.sub5:vreg_512 = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: %28.sub6:vreg_512 = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: %28.sub7:vreg_512 = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: %28.sub8:vreg_512 = PRED_COPY [[PRED_COPY9]] + ; GCN-NEXT: %28.sub9:vreg_512 = PRED_COPY [[PRED_COPY10]] + ; GCN-NEXT: %28.sub10:vreg_512 = PRED_COPY [[PRED_COPY11]] + ; GCN-NEXT: %28.sub11:vreg_512 = PRED_COPY [[PRED_COPY12]] + ; GCN-NEXT: %28.sub12:vreg_512 = PRED_COPY [[PRED_COPY13]] + ; GCN-NEXT: %28.sub13:vreg_512 = PRED_COPY [[PRED_COPY14]] + ; GCN-NEXT: %28.sub14:vreg_512 = PRED_COPY [[PRED_COPY15]] + ; GCN-NEXT: %28.sub15:vreg_512 = PRED_COPY [[PRED_COPY16]] ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -81,13 +81,13 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) - ; GCN-NEXT: dead %45:vgpr_32 = COPY [[DEF]] - ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec + ; GCN-NEXT: dead [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[PRED_COPY]](s32), implicit $exec + ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[PRED_COPY]](s32), implicit $exec ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %28, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec - ; GCN-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] - ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] + ; GCN-NEXT: renamable $sgpr2_sgpr3 = PRED_COPY renamable $sgpr0_sgpr1 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -9,14 +9,14 @@ ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6750218 /* regdef:SGPR_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6750218 /* regdef:SGPR_128 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -27,14 +27,14 @@ ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:VReg_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128_align2 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:VReg_128_Align2 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -45,14 +45,14 @@ ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5636106 /* regdef:AReg_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5636105 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5636105 /* reguse:AReg_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5898250 /* regdef:AReg_128_Align2 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5898249 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128_align2 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5898249 /* reguse:AReg_128_Align2 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll @@ -12,20 +12,20 @@ ; GCN-LABEL: name: load_zeroinit_lds_global ; GCN: bb.0 (%ir-block.0): ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 - ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 36, 0 + ; GFX8: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX8: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec ; GCN: SI_INIT_M0 -1, implicit-def $m0 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 killed [[V_MOV_B32_e32_]], 40, 0, implicit $m0, implicit $exec - ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX9: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8: BUFFER_STORE_DWORD_OFFSET killed [[DS_READ_B32_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec - ; GFX9: FLAT_STORE_DWORD killed [[COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: FLAT_STORE_DWORD killed [[PRED_COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_ENDPGM 0 %gep = getelementptr [256 x i32], ptr addrspace(3) @lds, i32 0, i32 10 %ld = load i32, ptr addrspace(3) %gep diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each ; pass. Ignore it with 'grep -v'. ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \ @@ -122,7 +123,7 @@ ; GCN-O0-NEXT: Fast Register Allocator ; GCN-O0-NEXT: SI lower SGPR spill instructions ; GCN-O0-NEXT: Fast Register Allocator -; GCN-O0-NEXT: SI Fix VGPR copies +; GCN-O0-NEXT: SI Simplify Predicated Copies ; GCN-O0-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O0-NEXT: Fixup Statepoint Caller Saved ; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis @@ -367,18 +368,17 @@ ; GCN-O1-NEXT: Live Register Matrix ; GCN-O1-NEXT: Greedy Register Allocator ; GCN-O1-NEXT: GCN NSA Reassign +; GCN-O1-NEXT: SI Simplify Predicated Copies ; GCN-O1-NEXT: Virtual Register Rewriter ; GCN-O1-NEXT: Stack Slot Coloring ; GCN-O1-NEXT: Machine Copy Propagation Pass ; GCN-O1-NEXT: Machine Loop Invariant Code Motion -; GCN-O1-NEXT: SI Fix VGPR copies ; GCN-O1-NEXT: SI optimize exec mask operations ; GCN-O1-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O1-NEXT: Fixup Statepoint Caller Saved ; GCN-O1-NEXT: PostRA Machine Sink -; GCN-O1-NEXT: MachineDominator Tree Construction -; GCN-O1-NEXT: Machine Natural Loop Construction ; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: MachinePostDominator Tree Construction ; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Optimization Remark Emitter @@ -671,18 +671,17 @@ ; GCN-O1-OPTS-NEXT: Live Register Matrix ; GCN-O1-OPTS-NEXT: Greedy Register Allocator ; GCN-O1-OPTS-NEXT: GCN NSA Reassign +; GCN-O1-OPTS-NEXT: SI Simplify Predicated Copies ; GCN-O1-OPTS-NEXT: Virtual Register Rewriter ; GCN-O1-OPTS-NEXT: Stack Slot Coloring ; GCN-O1-OPTS-NEXT: Machine Copy Propagation Pass ; GCN-O1-OPTS-NEXT: Machine Loop Invariant Code Motion -; GCN-O1-OPTS-NEXT: SI Fix VGPR copies ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations ; GCN-O1-OPTS-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT: Fixup Statepoint Caller Saved ; GCN-O1-OPTS-NEXT: PostRA Machine Sink -; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction -; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction ; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter @@ -977,18 +976,17 @@ ; GCN-O2-NEXT: Live Register Matrix ; GCN-O2-NEXT: Greedy Register Allocator ; GCN-O2-NEXT: GCN NSA Reassign +; GCN-O2-NEXT: SI Simplify Predicated Copies ; GCN-O2-NEXT: Virtual Register Rewriter ; GCN-O2-NEXT: Stack Slot Coloring ; GCN-O2-NEXT: Machine Copy Propagation Pass ; GCN-O2-NEXT: Machine Loop Invariant Code Motion -; GCN-O2-NEXT: SI Fix VGPR copies ; GCN-O2-NEXT: SI optimize exec mask operations ; GCN-O2-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O2-NEXT: Fixup Statepoint Caller Saved ; GCN-O2-NEXT: PostRA Machine Sink -; GCN-O2-NEXT: MachineDominator Tree Construction -; GCN-O2-NEXT: Machine Natural Loop Construction ; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: MachineDominator Tree Construction ; GCN-O2-NEXT: MachinePostDominator Tree Construction ; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Optimization Remark Emitter @@ -1294,18 +1292,17 @@ ; GCN-O3-NEXT: Live Register Matrix ; GCN-O3-NEXT: Greedy Register Allocator ; GCN-O3-NEXT: GCN NSA Reassign +; GCN-O3-NEXT: SI Simplify Predicated Copies ; GCN-O3-NEXT: Virtual Register Rewriter ; GCN-O3-NEXT: Stack Slot Coloring ; GCN-O3-NEXT: Machine Copy Propagation Pass ; GCN-O3-NEXT: Machine Loop Invariant Code Motion -; GCN-O3-NEXT: SI Fix VGPR copies ; GCN-O3-NEXT: SI optimize exec mask operations ; GCN-O3-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O3-NEXT: Fixup Statepoint Caller Saved ; GCN-O3-NEXT: PostRA Machine Sink -; GCN-O3-NEXT: MachineDominator Tree Construction -; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: MachineDominator Tree Construction ; GCN-O3-NEXT: MachinePostDominator Tree Construction ; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -7245,11 +7245,13 @@ ; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v3 +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v21, 0xffff, v3 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v5 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v40, 16, v6 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, 0xffff, v6 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v8 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v8 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v30, v20 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v32, v22 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v7 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v7 @@ -7285,6 +7287,7 @@ ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v35, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v37, v1 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, v21 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir @@ -16,14 +16,14 @@ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY3]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -32,27 +32,27 @@ ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_MOV_B64_term]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[S_MOV_B64_term]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[PRED_COPY4]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[COPY5]], implicit $exec - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY1]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]] - ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %10:vreg_64, [[COPY66]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[COPY66]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY8]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY66:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY6]] + ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %10:vreg_64, [[PRED_COPY66]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY66]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY8]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY8]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -61,7 +61,7 @@ successors: %bb.2(0x40000000), %bb.1(0x40000000) liveins: $vgpr0 - %0:vgpr_32 = COPY killed $vgpr0 + %0:vgpr_32 = PRED_COPY killed $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %0, implicit $exec %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -80,7 +80,7 @@ %9:vgpr_32 = PHI %8, %bb.1, %7, %bb.2, %1, %bb.0 GLOBAL_STORE_DWORD undef %10:vreg_64, %9, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - %7:vgpr_32 = COPY killed %9 + %7:vgpr_32 = PRED_COPY killed %9 %5:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.2 @@ -97,14 +97,14 @@ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY3]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -113,33 +113,33 @@ ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_MOV_B64_term]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[S_MOV_B64_term]] ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY5]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY6]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[PRED_COPY5]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[PRED_COPY6]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[COPY4]], implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] - ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %11:vreg_64, [[COPY77]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY77]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY8]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY9]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY77:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] + ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %11:vreg_64, [[PRED_COPY77]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY77]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY9]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -148,7 +148,7 @@ successors: %bb.3(0x40000000), %bb.1(0x40000000) liveins: $vgpr0 - %0:vgpr_32 = COPY killed $vgpr0 + %0:vgpr_32 = PRED_COPY killed $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %0, implicit $exec %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -164,7 +164,7 @@ bb.2: successors: %bb.3(0x80000000) - %8:sreg_64_xexec = COPY %4 + %8:sreg_64_xexec = PRED_COPY %4 SI_END_CF killed %8, implicit-def $exec, implicit-def dead $scc, implicit $exec %9:vgpr_32 = nsw V_ADD_U32_e32 1, killed %6, implicit $exec @@ -173,7 +173,7 @@ %10:vgpr_32 = PHI %9, %bb.2, %7, %bb.3, %1, %bb.0 GLOBAL_STORE_DWORD undef %11:vreg_64, %10, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - %7:vgpr_32 = COPY killed %10 + %7:vgpr_32 = PRED_COPY killed %10 %5:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -21,9 +21,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -68,9 +68,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec @@ -209,9 +209,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec @@ -221,23 +221,23 @@ ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] - ; CHECK-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] + ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_SLEEP 1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY1]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir --- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3 ; CHECK-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[S_ANDN2_B64_]] ; CHECK-NEXT: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; CHECK-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF @@ -67,25 +67,25 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]] - ; CHECK-NEXT: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]] + ; CHECK-NEXT: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY6]], implicit $exec ; CHECK-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[V_LSHR_B32_e32_]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[PHI1]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY7]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[PHI1]] - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[PHI1]] - ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY9]], implicit-def dead $scc + ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY8]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[COPY6]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PRED_COPY]], $exec, implicit-def $scc ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_OR_B64_]], $exec, implicit-def $scc ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x00000000), %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[PRED_COPY]], %bb.1, [[S_OR_B64_1]], %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.1, [[S_MOV_B64_2]], %bb.2 ; CHECK-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir --- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir @@ -10,7 +10,7 @@ ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr0 = COPY $sgpr1 + ; CHECK-NEXT: $sgpr0 = PRED_COPY $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) @@ -25,7 +25,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1 = PRED_COPY $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir --- a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir @@ -9,9 +9,9 @@ ; GCN-LABEL: name: merge_flat_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -26,11 +26,11 @@ ; GCN-LABEL: name: merge_flat_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -46,13 +46,13 @@ ; GCN-LABEL: name: merge_flat_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 2, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -69,14 +69,14 @@ ; GCN-LABEL: name: merge_flat_load_dword_5 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 3, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`) - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[FLAT_LOAD_DWORD]] + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[FLAT_LOAD_DWORD]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -94,16 +94,16 @@ ; GCN-LABEL: name: merge_flat_load_dword_6 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -122,9 +122,9 @@ ; GCN-LABEL: name: merge_flat_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) %2:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) @@ -139,9 +139,9 @@ ; GCN-LABEL: name: merge_flat_load_dwordx3_with_dwordx1 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = FLAT_LOAD_DWORDX3 %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `i128* undef`, align 8) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -156,9 +156,9 @@ ; GCN-LABEL: name: merge_flat_load_dwordx1_with_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX3_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub1_sub2 - ; GCN-NEXT: S_NOP 0, implicit [[COPY1]], implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub1_sub2 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:vreg_64_align2 = IMPLICIT_DEF %2:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 8) diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir --- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir @@ -9,9 +9,9 @@ ; GCN-LABEL: name: merge_flat_global_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef` + 4, basealign 4) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) @@ -26,9 +26,9 @@ ; GCN-LABEL: name: merge_global_flat_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, basealign 8, addrspace 1) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef` + 4, basealign 8) @@ -43,11 +43,11 @@ ; GCN-LABEL: name: merge_global_flat_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 16) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`, basealign 16) @@ -63,13 +63,13 @@ ; GCN-LABEL: name: merge_global_flat_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef` + 4, align 4, basealign 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 8, addrspace 1) @@ -86,9 +86,9 @@ ; GCN-LABEL: name: merge_flat_global_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `double* undef`) %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 8, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) @@ -103,9 +103,9 @@ ; GCN-LABEL: name: merge_flat_global_load_dwordx3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX4_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`) %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 4, 0, implicit $exec :: (load (s96) from `<3 x i32> addrspace(1)* undef`, addrspace 1) @@ -120,9 +120,9 @@ ; GCN-LABEL: name: merge_global_flat_load_dwordx3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX4_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) %2:vreg_96_align2 = FLAT_LOAD_DWORDX3 %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `<3 x i32>* undef`) @@ -139,9 +139,9 @@ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`) ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF1]], [[DEF]].sub0, 4, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:sreg_64_xexec = IMPLICIT_DEF %2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`, basealign 4) @@ -160,9 +160,9 @@ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF1]], [[DEF]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:sreg_64_xexec = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir --- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir @@ -9,9 +9,9 @@ ; GCN-LABEL: name: merge_global_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) @@ -26,11 +26,11 @@ ; GCN-LABEL: name: merge_global_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -46,13 +46,13 @@ ; GCN-LABEL: name: merge_global_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -69,14 +69,14 @@ ; GCN-LABEL: name: merge_global_load_dword_5 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 16, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[GLOBAL_LOAD_DWORD]] + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[GLOBAL_LOAD_DWORD]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -94,16 +94,16 @@ ; GCN-LABEL: name: merge_global_load_dword_6 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 16, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -122,9 +122,9 @@ ; GCN-LABEL: name: merge_global_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 8, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) @@ -139,9 +139,9 @@ ; GCN-LABEL: name: merge_global_load_dwordx3_with_dwordx1 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 12, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 12, 0, implicit $exec :: (load (s96) from `i128 addrspace(1)* undef`, align 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -156,9 +156,9 @@ ; GCN-LABEL: name: merge_global_load_dwordx1_with_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 12, 0, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub1_sub2 - ; GCN-NEXT: S_NOP 0, implicit [[COPY1]], implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub1_sub2 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:vreg_64_align2 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %1:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 16, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 8, addrspace 1) @@ -238,9 +238,9 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -257,11 +257,11 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -279,13 +279,13 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -304,16 +304,16 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 20, 3, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -334,9 +334,9 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) @@ -405,9 +405,9 @@ ; GCN-LABEL: name: merge_global_load_dword_2_out_of_order ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, align 4, addrspace 1) @@ -422,11 +422,11 @@ ; GCN-LABEL: name: merge_global_load_dword_3_out_of_order ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 0, 0, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 16, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir @@ -2,17 +2,17 @@ # GFX10-LABEL: name: image_load_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -20,17 +20,17 @@ --- # GFX10-LABEL: name: image_load_merged_v1v3_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -39,17 +39,17 @@ # GFX10-LABEL: name: image_load_merged_v2v2 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -58,17 +58,17 @@ # GFX10-LABEL: name: image_load_merged_v2v2_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -77,17 +77,17 @@ # GFX10-LABEL: name: image_load_merged_v3v1 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -96,17 +96,17 @@ # GFX10-LABEL: name: image_load_merged_v3v1_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -119,11 +119,11 @@ name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -141,11 +141,11 @@ name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -160,11 +160,11 @@ name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -178,11 +178,11 @@ name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -196,11 +196,11 @@ name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -215,12 +215,12 @@ name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -234,11 +234,11 @@ name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -252,11 +252,11 @@ name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -270,11 +270,11 @@ name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -288,11 +288,11 @@ name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -306,11 +306,11 @@ name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -324,13 +324,13 @@ name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) - %6:vgpr_32 = COPY %5.sub0 + %6:vgpr_32 = PRED_COPY %5.sub0 %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... @@ -343,11 +343,11 @@ name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -379,11 +379,11 @@ name: image_load_not_merged_11 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -392,17 +392,17 @@ # GFX10-LABEL: name: image_load_mip_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -413,17 +413,17 @@ # GFX10-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -434,17 +434,17 @@ # GFX10-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -453,17 +453,17 @@ # GFX10-LABEL: name: image_load_pck_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -472,17 +472,17 @@ # GFX10-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir @@ -2,17 +2,17 @@ # GFX11-LABEL: name: image_load_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -20,17 +20,17 @@ --- # GFX11-LABEL: name: image_load_merged_v1v3_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -39,17 +39,17 @@ # GFX11-LABEL: name: image_load_merged_v2v2 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -58,17 +58,17 @@ # GFX11-LABEL: name: image_load_merged_v2v2_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -77,17 +77,17 @@ # GFX11-LABEL: name: image_load_merged_v3v1 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -96,17 +96,17 @@ # GFX11-LABEL: name: image_load_merged_v3v1_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -119,11 +119,11 @@ name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) @@ -141,11 +141,11 @@ name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) @@ -160,11 +160,11 @@ name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -178,11 +178,11 @@ name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -196,11 +196,11 @@ name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -215,12 +215,12 @@ name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -234,11 +234,11 @@ name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -252,11 +252,11 @@ name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -270,11 +270,11 @@ name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -288,11 +288,11 @@ name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -306,11 +306,11 @@ name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -324,13 +324,13 @@ name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = COPY %5.sub0 + %6:vgpr_32 = PRED_COPY %5.sub0 %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx11 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) ... @@ -343,11 +343,11 @@ name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -361,11 +361,11 @@ name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -379,11 +379,11 @@ name: image_load_not_merged_11 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -392,17 +392,17 @@ # GFX11-LABEL: name: image_load_mip_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -413,17 +413,17 @@ # GFX11-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -434,17 +434,17 @@ # GFX11-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -453,17 +453,17 @@ # GFX11-LABEL: name: image_load_pck_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -472,17 +472,17 @@ # GFX11-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir @@ -2,17 +2,17 @@ # GFX9-LABEL: name: image_load_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -21,17 +21,17 @@ # GFX9-LABEL: name: image_load_merged_v1v3_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -40,17 +40,17 @@ # GFX9-LABEL: name: image_load_merged_v2v2 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -59,17 +59,17 @@ # GFX9-LABEL: name: image_load_merged_v2v2_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -78,17 +78,17 @@ # GFX9-LABEL: name: image_load_merged_v3v1 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -97,17 +97,17 @@ # GFX9-LABEL: name: image_load_merged_v3v1_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -120,11 +120,11 @@ name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -142,11 +142,11 @@ name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -161,11 +161,11 @@ name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -179,11 +179,11 @@ name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -197,11 +197,11 @@ name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -216,12 +216,12 @@ name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -235,11 +235,11 @@ name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -253,11 +253,11 @@ name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -271,11 +271,11 @@ name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -289,11 +289,11 @@ name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -307,11 +307,11 @@ name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -325,11 +325,11 @@ name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -343,11 +343,11 @@ name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -374,17 +374,17 @@ # GFX9-LABEL: name: image_load_mip_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -393,17 +393,17 @@ # GFX9-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -412,17 +412,17 @@ # GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -431,17 +431,17 @@ # GFX9-LABEL: name: image_load_pck_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -450,17 +450,17 @@ # GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir @@ -2,17 +2,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -20,17 +20,17 @@ --- # GFX10-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -39,17 +39,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v2v2 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -58,17 +58,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -77,17 +77,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v3v1 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -96,17 +96,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -119,11 +119,11 @@ name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -141,11 +141,11 @@ name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -160,11 +160,11 @@ name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -178,11 +178,11 @@ name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -196,11 +196,11 @@ name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -215,12 +215,12 @@ name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -234,12 +234,12 @@ name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -253,11 +253,11 @@ name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -271,11 +271,11 @@ name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -289,11 +289,11 @@ name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -307,11 +307,11 @@ name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -325,11 +325,11 @@ name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -343,11 +343,11 @@ name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -379,11 +379,11 @@ name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -395,17 +395,17 @@ # GFX10-LABEL: name: image_sample_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -415,17 +415,17 @@ # GFX10-LABEL: name: image_sample_b_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -435,17 +435,17 @@ # GFX10-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -455,17 +455,17 @@ # GFX10-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -475,17 +475,17 @@ # GFX10-LABEL: name: image_sample_b_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -495,17 +495,17 @@ # GFX10-LABEL: name: image_sample_c_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -515,17 +515,17 @@ # GFX10-LABEL: name: image_sample_cd_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -535,17 +535,17 @@ # GFX10-LABEL: name: image_sample_cd_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -555,17 +555,17 @@ # GFX10-LABEL: name: image_sample_cd_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -575,17 +575,17 @@ # GFX10-LABEL: name: image_sample_cd_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -595,17 +595,17 @@ # GFX10-LABEL: name: image_sample_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -615,17 +615,17 @@ # GFX10-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -635,17 +635,17 @@ # GFX10-LABEL: name: image_sample_c_b_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -655,17 +655,17 @@ # GFX10-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -675,17 +675,17 @@ # GFX10-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -695,17 +695,17 @@ # GFX10-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -715,17 +715,17 @@ # GFX10-LABEL: name: image_sample_c_cd_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -735,17 +735,17 @@ # GFX10-LABEL: name: image_sample_c_cd_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -755,17 +755,17 @@ # GFX10-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -775,17 +775,17 @@ # GFX10-LABEL: name: image_sample_c_cd_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -795,17 +795,17 @@ # GFX10-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -815,17 +815,17 @@ # GFX10-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -835,17 +835,17 @@ # GFX10-LABEL: name: image_sample_c_d_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -855,17 +855,17 @@ # GFX10-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -875,17 +875,17 @@ # GFX10-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -895,17 +895,17 @@ # GFX10-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -915,17 +915,17 @@ # GFX10-LABEL: name: image_sample_c_l_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -935,17 +935,17 @@ # GFX10-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -955,17 +955,17 @@ # GFX10-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -975,17 +975,17 @@ # GFX10-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -995,17 +995,17 @@ # GFX10-LABEL: name: image_sample_c_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1015,17 +1015,17 @@ # GFX10-LABEL: name: image_sample_d_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1035,17 +1035,17 @@ # GFX10-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1055,17 +1055,17 @@ # GFX10-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1075,17 +1075,17 @@ # GFX10-LABEL: name: image_sample_d_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1095,17 +1095,17 @@ # GFX10-LABEL: name: image_sample_lz_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1115,17 +1115,17 @@ # GFX10-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1135,17 +1135,17 @@ # GFX10-LABEL: name: image_sample_l_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1155,17 +1155,17 @@ # GFX10-LABEL: name: image_sample_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir @@ -2,17 +2,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -20,17 +20,17 @@ --- # GFX11-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -39,17 +39,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v2v2 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -58,17 +58,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -77,17 +77,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v3v1 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -96,17 +96,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -119,11 +119,11 @@ name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) @@ -141,11 +141,11 @@ name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) IMAGE_STORE_V4_V2_nsa_gfx11 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) @@ -160,11 +160,11 @@ name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -178,11 +178,11 @@ name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -196,11 +196,11 @@ name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -215,12 +215,12 @@ name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -234,12 +234,12 @@ name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -253,11 +253,11 @@ name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -271,11 +271,11 @@ name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -289,11 +289,11 @@ name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -307,11 +307,11 @@ name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -325,11 +325,11 @@ name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx11 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -343,11 +343,11 @@ name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -361,11 +361,11 @@ name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -379,11 +379,11 @@ name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -395,17 +395,17 @@ # GFX11-LABEL: name: image_sample_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx11 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -415,17 +415,17 @@ # GFX11-LABEL: name: image_sample_b_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -435,17 +435,17 @@ # GFX11-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -455,17 +455,17 @@ # GFX11-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -475,17 +475,17 @@ # GFX11-LABEL: name: image_sample_b_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -495,17 +495,17 @@ # GFX11-LABEL: name: image_sample_c_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -515,17 +515,17 @@ # GFX11-LABEL: name: image_sample_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -535,17 +535,17 @@ # GFX11-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -555,17 +555,17 @@ # GFX11-LABEL: name: image_sample_c_b_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -575,17 +575,17 @@ # GFX11-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -595,17 +595,17 @@ # GFX11-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_192 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -615,17 +615,17 @@ # GFX11-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -635,17 +635,17 @@ # GFX11-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -655,17 +655,17 @@ # GFX11-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -675,17 +675,17 @@ # GFX11-LABEL: name: image_sample_c_d_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -695,17 +695,17 @@ # GFX11-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -715,17 +715,17 @@ # GFX11-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_288 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_gfx11 %5:vreg_288, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -735,17 +735,17 @@ # GFX11-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -755,17 +755,17 @@ # GFX11-LABEL: name: image_sample_c_l_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -775,17 +775,17 @@ # GFX11-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -795,17 +795,17 @@ # GFX11-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -815,17 +815,17 @@ # GFX11-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -835,17 +835,17 @@ # GFX11-LABEL: name: image_sample_c_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -855,17 +855,17 @@ # GFX11-LABEL: name: image_sample_d_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_192 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -875,17 +875,17 @@ # GFX11-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -895,17 +895,17 @@ # GFX11-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -915,17 +915,17 @@ # GFX11-LABEL: name: image_sample_d_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -935,17 +935,17 @@ # GFX11-LABEL: name: image_sample_lz_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx11 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -955,17 +955,17 @@ # GFX11-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -975,17 +975,17 @@ # GFX11-LABEL: name: image_sample_l_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -995,17 +995,17 @@ # GFX11-LABEL: name: image_sample_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir @@ -2,17 +2,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -21,17 +21,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -40,17 +40,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v2v2 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -59,17 +59,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -78,17 +78,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v3v1 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -97,17 +97,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -120,11 +120,11 @@ name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -142,11 +142,11 @@ name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -161,11 +161,11 @@ name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -179,11 +179,11 @@ name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -197,11 +197,11 @@ name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -216,12 +216,12 @@ name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -235,12 +235,12 @@ name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -254,11 +254,11 @@ name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -272,11 +272,11 @@ name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -290,11 +290,11 @@ name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -308,11 +308,11 @@ name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -326,11 +326,11 @@ name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -344,11 +344,11 @@ name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -362,11 +362,11 @@ name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -380,11 +380,11 @@ name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -393,17 +393,17 @@ # GFX9-LABEL: name: image_sample_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -412,17 +412,17 @@ # GFX9-LABEL: name: image_sample_b_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -431,17 +431,17 @@ # GFX9-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -450,17 +450,17 @@ # GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -469,17 +469,17 @@ # GFX9-LABEL: name: image_sample_b_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -488,17 +488,17 @@ # GFX9-LABEL: name: image_sample_c_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -507,17 +507,17 @@ # GFX9-LABEL: name: image_sample_cd_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -526,17 +526,17 @@ # GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -545,17 +545,17 @@ # GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -564,17 +564,17 @@ # GFX9-LABEL: name: image_sample_cd_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -583,17 +583,17 @@ # GFX9-LABEL: name: image_sample_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -602,17 +602,17 @@ # GFX9-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -621,17 +621,17 @@ # GFX9-LABEL: name: image_sample_c_b_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -640,17 +640,17 @@ # GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -659,17 +659,17 @@ # GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -678,17 +678,17 @@ # GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -697,17 +697,17 @@ # GFX9-LABEL: name: image_sample_c_cd_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -716,17 +716,17 @@ # GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -735,17 +735,17 @@ # GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -754,17 +754,17 @@ # GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -773,17 +773,17 @@ # GFX9-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -792,17 +792,17 @@ # GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -811,17 +811,17 @@ # GFX9-LABEL: name: image_sample_c_d_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -830,17 +830,17 @@ # GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -849,17 +849,17 @@ # GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -868,17 +868,17 @@ # GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -887,17 +887,17 @@ # GFX9-LABEL: name: image_sample_c_l_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -906,17 +906,17 @@ # GFX9-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -925,17 +925,17 @@ # GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -944,17 +944,17 @@ # GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -963,17 +963,17 @@ # GFX9-LABEL: name: image_sample_c_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -982,17 +982,17 @@ # GFX9-LABEL: name: image_sample_d_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1001,17 +1001,17 @@ # GFX9-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1020,17 +1020,17 @@ # GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1039,17 +1039,17 @@ # GFX9-LABEL: name: image_sample_d_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1058,17 +1058,17 @@ # GFX9-LABEL: name: image_sample_lz_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1077,17 +1077,17 @@ # GFX9-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1096,17 +1096,17 @@ # GFX9-LABEL: name: image_sample_l_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1115,17 +1115,17 @@ # GFX9-LABEL: name: image_sample_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir @@ -8,15 +8,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx9_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -25,15 +25,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx9_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -42,15 +42,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx9_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -59,15 +59,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_x_xy # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx9_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -76,15 +76,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_xy_x # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx9_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -94,16 +94,16 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_x_x # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx9_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -112,16 +112,16 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx9_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -131,28 +131,28 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_float_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -168,28 +168,28 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_sint_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -205,28 +205,28 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_uint_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -241,10 +241,10 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -260,10 +260,10 @@ name: gfx9_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -278,10 +278,10 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -295,10 +295,10 @@ name: gfx9_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -320,14 +320,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -345,14 +345,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) @@ -369,14 +369,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -393,14 +393,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -417,14 +417,14 @@ bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -440,14 +440,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -461,14 +461,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -476,19 +476,19 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_store_float32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -503,19 +503,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -530,19 +530,19 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_store_sint32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -557,19 +557,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -584,19 +584,19 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_store_uint32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -611,19 +611,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -638,19 +638,19 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_data_format_mismatch -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -665,19 +665,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -692,19 +692,19 @@ --- # GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_num_format_mismatch -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -719,19 +719,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -752,10 +752,10 @@ name: gfx9_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -769,10 +769,10 @@ name: gfx9_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -786,10 +786,10 @@ name: gfx9_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 116, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -805,11 +805,11 @@ name: gfx9_tbuffer_load_merge_across_swizzled_store body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 - %4:vgpr_32 = COPY $vgpr0 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 + %4:vgpr_32 = PRED_COPY $vgpr0 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %5:sgpr_128, 0, 6, 116, 0, 1, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -824,15 +824,15 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx10_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -841,15 +841,15 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx10_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -858,15 +858,15 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx10_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -875,15 +875,15 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_x_xy # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx10_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -892,15 +892,15 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_xy_x # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx10_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -910,16 +910,16 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_x_x # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx10_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -928,16 +928,16 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx10_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -947,28 +947,28 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_float_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -984,28 +984,28 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_sint_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1021,28 +1021,28 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_uint_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1057,10 +1057,10 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1076,10 +1076,10 @@ name: gfx10_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1094,10 +1094,10 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1111,10 +1111,10 @@ name: gfx10_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1138,14 +1138,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1162,14 +1162,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) @@ -1186,14 +1186,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -1210,14 +1210,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1234,14 +1234,14 @@ bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1257,14 +1257,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1278,14 +1278,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1293,19 +1293,19 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_store_float32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1320,19 +1320,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1347,19 +1347,19 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_store_sint32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1374,19 +1374,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1401,19 +1401,19 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_store_uint32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1428,19 +1428,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1455,19 +1455,19 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_data_format_mismatch -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1482,19 +1482,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1509,19 +1509,19 @@ --- # GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_num_format_mismatch -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1536,19 +1536,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1569,10 +1569,10 @@ name: gfx10_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1586,10 +1586,10 @@ name: gfx10_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1603,10 +1603,10 @@ name: gfx10_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 22, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1621,15 +1621,15 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx11_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 60, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) @@ -1638,15 +1638,15 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_xyz_x # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx11_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 60, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) @@ -1655,15 +1655,15 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx11_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 50, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) @@ -1672,15 +1672,15 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_x_xy # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 60, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx11_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 50, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) @@ -1689,15 +1689,15 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_xy_x # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 60, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx11_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1707,16 +1707,16 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_x_x # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx11_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1725,16 +1725,16 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_x_x_format_32_32_32_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx11_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 63, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1744,28 +1744,28 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_float_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 60, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1781,28 +1781,28 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_sint_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 49, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 62, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 59, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1818,28 +1818,28 @@ # GFX11-LABEL: name: gfx11_tbuffer_load_uint_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 48, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 61, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 58, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1854,10 +1854,10 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_data_format_mismatch -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1873,10 +1873,10 @@ name: gfx11_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1891,10 +1891,10 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_num_format_mismatch -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1908,10 +1908,10 @@ name: gfx11_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1935,14 +1935,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -1959,14 +1959,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 60, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) @@ -1983,14 +1983,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -2007,14 +2007,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2031,14 +2031,14 @@ bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) @@ -2054,14 +2054,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2075,14 +2075,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 63, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 63, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2090,19 +2090,19 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_store_float32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 50, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2117,19 +2117,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2144,19 +2144,19 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_store_sint32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 49, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2171,19 +2171,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2198,19 +2198,19 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_store_uint32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 48, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2225,19 +2225,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2252,19 +2252,19 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_store_not_merged_data_format_mismatch -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -2279,19 +2279,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2306,19 +2306,19 @@ --- # GFX11-LABEL: name: gfx11_tbuffer_store_not_merged_num_format_mismatch -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -2333,19 +2333,19 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2366,10 +2366,10 @@ name: gfx11_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -2383,10 +2383,10 @@ name: gfx11_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -2400,10 +2400,10 @@ name: gfx11_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir --- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir @@ -17,13 +17,13 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -65,13 +65,13 @@ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -113,13 +113,13 @@ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -157,13 +157,13 @@ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: GLOBAL_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -205,13 +205,13 @@ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -252,13 +252,13 @@ ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -297,13 +297,13 @@ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -342,13 +342,13 @@ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -388,7 +388,7 @@ ; GCN-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_AND_B32_e64_]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -428,7 +428,7 @@ ; GCN-NEXT: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_AND_B32_e64_]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -40,10 +40,10 @@ ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -87,10 +87,10 @@ ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -160,10 +160,10 @@ ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY3]], implicit $exec + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1, killed [[PRED_COPY1]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -207,10 +207,10 @@ ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY3]], implicit $exec + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1, killed [[PRED_COPY1]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -280,10 +280,10 @@ ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY3]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[PRED_COPY2]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -327,10 +327,10 @@ ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY3]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[PRED_COPY2]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -399,18 +399,18 @@ ; ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 + ; ADDR64-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY9]].sub0, [[COPY1]].sub0, 0, implicit $exec - ; ADDR64-NEXT: %17:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY9]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %17, %subreg.sub1 + ; ADDR64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]].sub0, [[COPY1]].sub0, 0, implicit $exec + ; ADDR64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] @@ -428,18 +428,18 @@ ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; W32-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; W32-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 822173696 ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; W32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY9]].sub0, [[COPY1]].sub0, 0, implicit $exec - ; W32-NEXT: %17:vgpr_32, dead %20:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %17, %subreg.sub1 + ; W32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]].sub0, [[COPY1]].sub0, 0, implicit $exec + ; W32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] @@ -484,16 +484,16 @@ ; ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 + ; ADDR64-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]].sub0, %subreg.sub0, [[COPY9]].sub1, %subreg.sub1 + ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY3]].sub0, %subreg.sub0, [[PRED_COPY3]].sub1, %subreg.sub1 ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] @@ -512,10 +512,10 @@ ; W64-NO-ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NO-ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NO-ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NO-ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W64-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NO-ADDR64-NEXT: {{ $}} ; W64-NO-ADDR64-NEXT: .1: @@ -559,10 +559,10 @@ ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[PRED_COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: diff --git a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir @@ -95,10 +95,10 @@ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_LOAD_DWORDX2_IMM1]].sub0, [[REG_SEQUENCE]].sub0, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY3]], implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[PRED_COPY]], implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576 ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 @@ -372,10 +372,10 @@ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_LOAD_DWORDX2_IMM1]].sub0, [[REG_SEQUENCE]].sub0, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY3]], implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[PRED_COPY]], implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 killed [[REG_SEQUENCE1]].sub0, 12, implicit $exec ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir @@ -18,7 +18,7 @@ ; CHECK-NEXT: renamable $vcc = V_CMP_EQ_U32_e64 0, killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc - ; CHECK-NEXT: renamable $sgpr0_sgpr1 = COPY killed renamable $sgpr0_sgpr1, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = PRED_COPY killed renamable $sgpr0_sgpr1, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir @@ -14,9 +14,9 @@ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = COPY killed renamable $sgpr4_sgpr5 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr6_sgpr7, implicit $exec - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr8_sgpr9, implicit $exec + ; CHECK-NEXT: $exec = PRED_COPY killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = PRED_COPY killed renamable $sgpr6_sgpr7, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr8_sgpr9, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -137,7 +137,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -175,7 +175,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -212,7 +212,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -242,7 +242,7 @@ # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_valu_middle liveins: @@ -251,7 +251,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -282,7 +282,7 @@ # CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1 +# CHECK-NEXT: $exec = PRED_COPY $sgpr0_sgpr1 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_wrong_reg liveins: @@ -293,7 +293,7 @@ $sgpr6 = S_MOV_B32 -1 $sgpr7 = S_MOV_B32 61440 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -321,7 +321,7 @@ # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_modify_copy_to_exec @@ -331,7 +331,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -364,7 +364,7 @@ # CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_live_out_setexec liveins: @@ -373,7 +373,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -401,9 +401,9 @@ ... # CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} -# CHECK: $sgpr0_sgpr1 = COPY $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_unknown_saveexec @@ -413,7 +413,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc @@ -450,7 +450,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -478,7 +478,7 @@ --- # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_andn2_saveexec_no_commute liveins: @@ -487,7 +487,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc @@ -515,7 +515,7 @@ --- # A read from exec copy subreg prevents optimization # CHECK-LABEL: name: if_and_xor_read_exec_copy_subreg{{$}} -# CHECK: $sgpr0_sgpr1 = COPY $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec # CHECK-NEXT: $sgpr4 = S_MOV_B32 $sgpr1 name: if_and_xor_read_exec_copy_subreg liveins: @@ -524,7 +524,7 @@ bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $sgpr4 = S_MOV_B32 $sgpr1 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -12,20 +12,20 @@ ; REGALLOC-GFX908-NEXT: {{ $}} ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32 ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def %26 - ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26 + ; REGALLOC-GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:av_128 = PRED_COPY %26 ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def %23 ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[PRED_COPY]] + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; REGALLOC-GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:areg_128 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) - ; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_128 = PRED_COPY [[V_MFMA_I32_4X4X4I8_e64_]] + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[PRED_COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: S_ENDPGM 0 ; PEI-GFX908-LABEL: name: partial_copy ; PEI-GFX908: bb.0 (%ir-block.0): @@ -36,21 +36,21 @@ ; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 - ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 - ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX908-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec ; PEI-GFX908-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; PEI-GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1 ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) - ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; PEI-GFX908-NEXT: S_ENDPGM 0 ; REGALLOC-GFX90A-LABEL: name: partial_copy @@ -59,15 +59,15 @@ ; REGALLOC-GFX90A-NEXT: {{ $}} ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32 ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def %25 - ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25 + ; REGALLOC-GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:av_128_align2 = PRED_COPY %25 ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def %23 ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[PRED_COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:areg_128_align2 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) @@ -81,13 +81,13 @@ ; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 - ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -41,12 +41,12 @@ ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr3 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 @@ -95,12 +95,12 @@ ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr2 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 @@ -144,10 +144,10 @@ ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr29 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr29 ; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr29 + ; CHECK-NEXT: $vgpr2 = PRED_COPY killed $sgpr29 ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -36,7 +36,7 @@ ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 4096, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr3 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir @@ -27,7 +27,7 @@ # CHECK-LABEL: name: foo # CHECK: bb.3: # CHECK-NEXT: dead %2:sreg_32_xm0 = IMPLICIT_DEF -# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4 +# CHECK-NEXT: %3:sreg_32_xm0 = PRED_COPY killed %4 # CHECK-NEXT: S_NOP 0, implicit killed %3 @@ -36,8 +36,8 @@ # With this PHI node order we did not hit the assert, but we used to get # # bb.3: -# dead %3:sreg_32_xm0 = COPY killed %4 -# %2:sreg_32_xm0 = COPY %4 +# dead %3:sreg_32_xm0 = PRED_COPY killed %4 +# %2:sreg_32_xm0 = PRED_COPY %4 # S_NOP 0, implicit killed %2 # # which looks weird regarding killed flags for %4. @@ -65,7 +65,7 @@ # CHECK-LABEL: name: bar # CHECK: bb.3: # CHECK-NEXT: dead %3:sreg_32_xm0 = IMPLICIT_DEF -# CHECK-NEXT: %2:sreg_32_xm0 = COPY killed %4 +# CHECK-NEXT: %2:sreg_32_xm0 = PRED_COPY killed %4 # CHECK-NEXT: S_NOP 0, implicit killed %2 @@ -92,4 +92,4 @@ # CHECK-LABEL: name: bax # CHECK: bb.3: -# CHECK-NEXT: %2:sreg_32_xm0 = COPY killed %3 +# CHECK-NEXT: %2:sreg_32_xm0 = PRED_COPY killed %3 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -7,7 +7,7 @@ # CHECK: [[IF_INPUT_REG:%[0-9]+]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE0]], implicit $exec # CHECK: bb.1: -# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = COPY killed [[IF_INPUT_REG]] +# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = PRED_COPY killed [[IF_INPUT_REG]] # CHECK: SI_END_CF killed [[END_CF_ARG]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec # CHECK: bb.2: @@ -25,7 +25,7 @@ successors: %bb.3(0x40000000), %bb.2(0x40000000) liveins: $vgpr0 - %5:vgpr_32(s32) = COPY $vgpr0 + %5:vgpr_32(s32) = PRED_COPY $vgpr0 %0:sreg_64 = V_CMP_EQ_U32_e64 0, %5(s32), implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %22:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -46,8 +46,8 @@ %15:sreg_32_xm0 = S_MOV_B32 61440 %16:sreg_32_xm0 = S_MOV_B32 -1 %17:sgpr_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 - BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - %19:vgpr_32 = COPY %4 + BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %19:vgpr_32 = PRED_COPY %4 %20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -86,8 +86,8 @@ ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 - ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = PRED_COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + ; CHECK-NEXT: renamable $sgpr52 = PRED_COPY renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) @@ -126,8 +126,8 @@ ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr33 - ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr60 = PRED_COPY killed renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr62 = PRED_COPY killed renamable $sgpr15 ; CHECK-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr16, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, CustomRegMask($sgpr60,$sgpr62) @@ -158,7 +158,7 @@ ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead %27:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec + ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) @@ -166,7 +166,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec ; CHECK-NEXT: renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: dead %30:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) @@ -182,40 +182,40 @@ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1) - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec ; CHECK-NEXT: renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35 - ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = PRED_COPY killed renamable $sgpr6_sgpr7 ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr52_sgpr53 - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11 + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = PRED_COPY killed renamable $sgpr10_sgpr11 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr12_sgpr13 + ; CHECK-NEXT: renamable $sgpr42_sgpr43 = PRED_COPY killed renamable $sgpr12_sgpr13 ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr33 ; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr36 = COPY killed renamable $sgpr16 - ; CHECK-NEXT: renamable $sgpr37 = COPY killed renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr40 = COPY killed renamable $sgpr8 - ; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr18_sgpr19 - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr20_sgpr21 - ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr22_sgpr23 - ; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr24_sgpr25 + ; CHECK-NEXT: renamable $sgpr36 = PRED_COPY killed renamable $sgpr16 + ; CHECK-NEXT: renamable $sgpr37 = PRED_COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr40 = PRED_COPY killed renamable $sgpr8 + ; CHECK-NEXT: renamable $sgpr44_sgpr45 = PRED_COPY killed renamable $sgpr18_sgpr19 + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = PRED_COPY killed renamable $sgpr20_sgpr21 + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = PRED_COPY killed renamable $sgpr22_sgpr23 + ; CHECK-NEXT: renamable $sgpr50_sgpr51 = PRED_COPY killed renamable $sgpr24_sgpr25 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 - ; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr50_sgpr51 - ; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr48_sgpr49 - ; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr46_sgpr47 - ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr44_sgpr45 - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr42_sgpr43 - ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr40 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr37 - ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr36 - ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr24_sgpr25 = PRED_COPY killed renamable $sgpr50_sgpr51 + ; CHECK-NEXT: renamable $sgpr22_sgpr23 = PRED_COPY killed renamable $sgpr48_sgpr49 + ; CHECK-NEXT: renamable $sgpr20_sgpr21 = PRED_COPY killed renamable $sgpr46_sgpr47 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = PRED_COPY killed renamable $sgpr44_sgpr45 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr42_sgpr43 + ; CHECK-NEXT: renamable $sgpr8 = PRED_COPY killed renamable $sgpr40 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = PRED_COPY killed renamable $sgpr38_sgpr39 + ; CHECK-NEXT: renamable $sgpr15 = PRED_COPY killed renamable $sgpr37 + ; CHECK-NEXT: renamable $sgpr16 = PRED_COPY killed renamable $sgpr36 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = PRED_COPY killed renamable $sgpr52_sgpr53 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr92_sgpr93 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir --- a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir @@ -52,13 +52,13 @@ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY [[SI_SPILL_V256_RESTORE1]] ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit [[COPY]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY]] ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]] ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir --- a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir +++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir @@ -23,13 +23,13 @@ ; GCN-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY3]], [[COPY4]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167 ; GCN-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY2]], implicit $exec - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]] - ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_e64_1]], [[COPY6]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[S_MOV_B32_]] + ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_e64_1]], [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GCN-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY3]], [[V_ADDC_U32_e64_]], implicit $exec ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736 ; GCN-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[V_MUL_HI_U32_e64_]], [[S_MOV_B32_1]], implicit $exec - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]] - ; GCN-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY7]], killed [[V_MUL_LO_U32_e64_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[S_MOV_B32_1]] + ; GCN-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY1]], killed [[V_MUL_LO_U32_e64_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec %0:vgpr_32 = COPY $vgpr0 %6:sreg_32 = COPY %0 %1:vgpr_32 = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll @@ -22,13 +22,14 @@ ; DEFAULT-NEXT: Live Register Matrix ; DEFAULT-NEXT: Greedy Register Allocator ; DEFAULT-NEXT: GCN NSA Reassign +; DEFAULT-NEXT: SI Simplify Predicated Copies ; DEFAULT-NEXT: Virtual Register Rewriter ; DEFAULT-NEXT: Stack Slot Coloring ; O0: Fast Register Allocator ; O0-NEXT: SI lower SGPR spill instructions ; O0-NEXT: Fast Register Allocator -; O0-NEXT: SI Fix VGPR copies +; O0-NEXT: SI Simplify Predicated Copies @@ -50,6 +51,7 @@ ; BASIC-DEFAULT-NEXT: Machine Optimization Remark Emitter ; BASIC-DEFAULT-NEXT: Greedy Register Allocator ; BASIC-DEFAULT-NEXT: GCN NSA Reassign +; BASIC-DEFAULT-NEXT: SI Simplify Predicated Copies ; BASIC-DEFAULT-NEXT: Virtual Register Rewriter ; BASIC-DEFAULT-NEXT: Stack Slot Coloring @@ -62,6 +64,7 @@ ; DEFAULT-BASIC-NEXT: Live Register Matrix ; DEFAULT-BASIC-NEXT: Basic Register Allocator ; DEFAULT-BASIC-NEXT: GCN NSA Reassign +; DEFAULT-BASIC-NEXT: SI Simplify Predicated Copies ; DEFAULT-BASIC-NEXT: Virtual Register Rewriter ; DEFAULT-BASIC-NEXT: Stack Slot Coloring @@ -80,6 +83,7 @@ ; BASIC-BASIC-NEXT: Live Register Matrix ; BASIC-BASIC-NEXT: Basic Register Allocator ; BASIC-BASIC-NEXT: GCN NSA Reassign +; BASIC-BASIC-NEXT: SI Simplify Predicated Copies ; BASIC-BASIC-NEXT: Virtual Register Rewriter ; BASIC-BASIC-NEXT: Stack Slot Coloring diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -11,7 +11,7 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc - ; GCN-NEXT: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc + ; GCN-NEXT: dead [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 %0:sgpr_64 = COPY $sgpr4_sgpr5 %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 @@ -28,9 +28,9 @@ ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc - ; GCN-NEXT: dead %0:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: dead [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -68,9 +68,9 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -79,8 +79,8 @@ ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: S_ENDPGM 0 @@ -117,9 +117,9 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -129,9 +129,9 @@ ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -179,9 +179,9 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -191,8 +191,8 @@ ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -242,9 +242,9 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -253,8 +253,8 @@ ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -302,9 +302,9 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -314,9 +314,9 @@ ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] ; GCN-NEXT: $sgpr4_sgpr5 = S_MOV_B64 32 - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -369,7 +369,7 @@ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec - ; GCN-NEXT: dead %5:sreg_64_xexec = S_MOV_B64 0 + ; GCN-NEXT: dead [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x80000000) @@ -380,11 +380,11 @@ ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN-NEXT: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN-NEXT: dead [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -8,17 +8,17 @@ ; GCN-NEXT: liveins: $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def undef %22.sub0 - ; GCN-NEXT: undef %24.sub0:av_64 = COPY %22.sub0 + ; GCN-NEXT: undef %24.sub0:av_64 = PRED_COPY %22.sub0 ; GCN-NEXT: SI_SPILL_AV64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] - ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[V_MFMA_I32_4X4X4I8_e64_]] + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0 + ; GCN-NEXT: undef %23.sub0:vreg_64 = PRED_COPY [[SI_SPILL_AV64_RESTORE]].sub0 ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3080201 /* reguse:VReg_64 */, %23 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir --- a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir @@ -40,9 +40,9 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 - ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31 + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = PRED_COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30_sub31:av_1024_align2 = PRED_COPY [[COPY]].sub29_sub30_sub31 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 @@ -116,9 +116,9 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 - ; CHECK-NEXT: internal %6.sub29_sub30:av_1024 = COPY [[COPY]].sub29_sub30 + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024 = PRED_COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16 + ; CHECK-NEXT: internal %6.sub29_sub30:av_1024 = PRED_COPY [[COPY]].sub29_sub30 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024 = IMPLICIT_DEF ; CHECK-NEXT: %6.sub31:av_1024 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -264,22 +264,22 @@ ; RA-NEXT: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1 ; RA-NEXT: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1 ; RA-NEXT: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1 - ; RA-NEXT: undef %15.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 { - ; RA-NEXT: internal %15.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11 - ; RA-NEXT: internal %15.sub7:sgpr_512 = COPY [[DEF2]].sub7 - ; RA-NEXT: internal %15.sub8:sgpr_512 = COPY [[DEF2]].sub8 - ; RA-NEXT: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13 - ; RA-NEXT: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14 + ; RA-NEXT: undef %15.sub4_sub5:sgpr_512 = PRED_COPY [[DEF2]].sub4_sub5 { + ; RA-NEXT: internal %15.sub10_sub11:sgpr_512 = PRED_COPY [[DEF2]].sub10_sub11 + ; RA-NEXT: internal %15.sub7:sgpr_512 = PRED_COPY [[DEF2]].sub7 + ; RA-NEXT: internal %15.sub8:sgpr_512 = PRED_COPY [[DEF2]].sub8 + ; RA-NEXT: internal %15.sub13:sgpr_512 = PRED_COPY [[DEF2]].sub13 + ; RA-NEXT: internal %15.sub14:sgpr_512 = PRED_COPY [[DEF2]].sub14 ; RA-NEXT: } ; RA-NEXT: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { - ; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 - ; RA-NEXT: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 - ; RA-NEXT: internal %14.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 - ; RA-NEXT: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 - ; RA-NEXT: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 + ; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { + ; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 + ; RA-NEXT: internal %14.sub7:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub7 + ; RA-NEXT: internal %14.sub8:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub8 + ; RA-NEXT: internal %14.sub13:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub13 + ; RA-NEXT: internal %14.sub14:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub14 ; RA-NEXT: } ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load (s32)) ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load (s32)) @@ -303,12 +303,12 @@ ; VR-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; VR-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; VR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; VR-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17 - ; VR-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr19 - ; VR-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 - ; VR-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr20 - ; VR-NEXT: renamable $sgpr21 = COPY killed renamable $sgpr25 - ; VR-NEXT: renamable $sgpr22 = COPY killed renamable $sgpr26 + ; VR-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr16_sgpr17 + ; VR-NEXT: renamable $sgpr15 = PRED_COPY killed renamable $sgpr19 + ; VR-NEXT: renamable $sgpr18_sgpr19 = PRED_COPY killed renamable $sgpr22_sgpr23 + ; VR-NEXT: renamable $sgpr16 = PRED_COPY killed renamable $sgpr20 + ; VR-NEXT: renamable $sgpr21 = PRED_COPY killed renamable $sgpr25 + ; VR-NEXT: renamable $sgpr22 = PRED_COPY killed renamable $sgpr26 ; VR-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF ; VR-NEXT: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load (s32)) ; VR-NEXT: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -62,140 +62,140 @@ ; CHECK-NEXT: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK-NEXT: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; CHECK-NEXT: undef %48.sub2:vreg_128 = COPY %47.sub2 + ; CHECK-NEXT: undef %48.sub2:vreg_128 = PRED_COPY %47.sub2 ; CHECK-NEXT: %48.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: undef %50.sub0:vreg_128 = COPY %48.sub0 { - ; CHECK-NEXT: internal %50.sub2:vreg_128 = COPY %48.sub2 + ; CHECK-NEXT: undef %50.sub0:vreg_128 = PRED_COPY %48.sub0 { + ; CHECK-NEXT: internal %50.sub2:vreg_128 = PRED_COPY %48.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %50, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %55.sub2:vreg_128 = COPY %54.sub2 + ; CHECK-NEXT: undef %55.sub2:vreg_128 = PRED_COPY %54.sub2 ; CHECK-NEXT: %55.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: undef %57.sub0:vreg_128 = COPY %55.sub0 { - ; CHECK-NEXT: internal %57.sub2:vreg_128 = COPY %55.sub2 + ; CHECK-NEXT: undef %57.sub0:vreg_128 = PRED_COPY %55.sub0 { + ; CHECK-NEXT: internal %57.sub2:vreg_128 = PRED_COPY %55.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %57, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %62.sub2:vreg_128 = COPY %61.sub2 + ; CHECK-NEXT: undef %62.sub2:vreg_128 = PRED_COPY %61.sub2 ; CHECK-NEXT: %62.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: undef %64.sub0:vreg_128 = COPY %62.sub0 { - ; CHECK-NEXT: internal %64.sub2:vreg_128 = COPY %62.sub2 + ; CHECK-NEXT: undef %64.sub0:vreg_128 = PRED_COPY %62.sub0 { + ; CHECK-NEXT: internal %64.sub2:vreg_128 = PRED_COPY %62.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %64, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %69.sub2:vreg_128 = COPY %68.sub2 + ; CHECK-NEXT: undef %69.sub2:vreg_128 = PRED_COPY %68.sub2 ; CHECK-NEXT: %69.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: undef %71.sub0:vreg_128 = COPY %69.sub0 { - ; CHECK-NEXT: internal %71.sub2:vreg_128 = COPY %69.sub2 + ; CHECK-NEXT: undef %71.sub0:vreg_128 = PRED_COPY %69.sub0 { + ; CHECK-NEXT: internal %71.sub2:vreg_128 = PRED_COPY %69.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %71, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %76.sub2:vreg_128 = COPY %75.sub2 + ; CHECK-NEXT: undef %76.sub2:vreg_128 = PRED_COPY %75.sub2 ; CHECK-NEXT: %76.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: undef %78.sub0:vreg_128 = COPY %76.sub0 { - ; CHECK-NEXT: internal %78.sub2:vreg_128 = COPY %76.sub2 + ; CHECK-NEXT: undef %78.sub0:vreg_128 = PRED_COPY %76.sub0 { + ; CHECK-NEXT: internal %78.sub2:vreg_128 = PRED_COPY %76.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %78, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %83.sub2:vreg_128 = COPY %82.sub2 + ; CHECK-NEXT: undef %83.sub2:vreg_128 = PRED_COPY %82.sub2 ; CHECK-NEXT: %83.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: undef %85.sub0:vreg_128 = COPY %83.sub0 { - ; CHECK-NEXT: internal %85.sub2:vreg_128 = COPY %83.sub2 + ; CHECK-NEXT: undef %85.sub0:vreg_128 = PRED_COPY %83.sub0 { + ; CHECK-NEXT: internal %85.sub2:vreg_128 = PRED_COPY %83.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %85, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %90.sub2:vreg_128 = COPY %89.sub2 + ; CHECK-NEXT: undef %90.sub2:vreg_128 = PRED_COPY %89.sub2 ; CHECK-NEXT: %90.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: undef %140.sub0:vreg_128 = COPY %90.sub0 { - ; CHECK-NEXT: internal %140.sub2:vreg_128 = COPY %90.sub2 + ; CHECK-NEXT: undef %140.sub0:vreg_128 = PRED_COPY %90.sub0 { + ; CHECK-NEXT: internal %140.sub2:vreg_128 = PRED_COPY %90.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %140, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %95.sub2:vreg_128 = COPY %94.sub2 + ; CHECK-NEXT: undef %95.sub2:vreg_128 = PRED_COPY %94.sub2 ; CHECK-NEXT: %95.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: undef %107.sub0:vreg_128 = COPY %95.sub0 { - ; CHECK-NEXT: internal %107.sub2:vreg_128 = COPY %95.sub2 + ; CHECK-NEXT: undef %107.sub0:vreg_128 = PRED_COPY %95.sub0 { + ; CHECK-NEXT: internal %107.sub2:vreg_128 = PRED_COPY %95.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %107, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %100.sub2:vreg_128 = COPY %99.sub2 + ; CHECK-NEXT: undef %100.sub2:vreg_128 = PRED_COPY %99.sub2 ; CHECK-NEXT: %100.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef %101.sub0:vreg_128 = COPY %100.sub0 { - ; CHECK-NEXT: internal %101.sub2:vreg_128 = COPY %100.sub2 + ; CHECK-NEXT: undef %101.sub0:vreg_128 = PRED_COPY %100.sub0 { + ; CHECK-NEXT: internal %101.sub2:vreg_128 = PRED_COPY %100.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %105.sub2:vreg_128 = COPY %104.sub2 + ; CHECK-NEXT: undef %105.sub2:vreg_128 = PRED_COPY %104.sub2 ; CHECK-NEXT: %105.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef %106.sub0:vreg_128 = COPY %105.sub0 { - ; CHECK-NEXT: internal %106.sub2:vreg_128 = COPY %105.sub2 + ; CHECK-NEXT: undef %106.sub0:vreg_128 = PRED_COPY %105.sub0 { + ; CHECK-NEXT: internal %106.sub2:vreg_128 = PRED_COPY %105.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %139.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: undef %158.sub0:vreg_128 = COPY %139.sub0 { - ; CHECK-NEXT: internal %158.sub2:vreg_128 = COPY %139.sub2 + ; CHECK-NEXT: undef %158.sub0:vreg_128 = PRED_COPY %139.sub0 { + ; CHECK-NEXT: internal %158.sub2:vreg_128 = PRED_COPY %139.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %158, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %186.sub2:vreg_128 = COPY %185.sub2 + ; CHECK-NEXT: undef %186.sub2:vreg_128 = PRED_COPY %185.sub2 ; CHECK-NEXT: %186.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef %188.sub0:vreg_128 = COPY %186.sub0 { - ; CHECK-NEXT: internal %188.sub2:vreg_128 = COPY %186.sub2 + ; CHECK-NEXT: undef %188.sub0:vreg_128 = PRED_COPY %186.sub0 { + ; CHECK-NEXT: internal %188.sub2:vreg_128 = PRED_COPY %186.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %188, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %167.sub2:vreg_128 = COPY %166.sub2 + ; CHECK-NEXT: undef %167.sub2:vreg_128 = PRED_COPY %166.sub2 ; CHECK-NEXT: %167.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: undef %169.sub0:vreg_128 = COPY %167.sub0 { - ; CHECK-NEXT: internal %169.sub2:vreg_128 = COPY %167.sub2 + ; CHECK-NEXT: undef %169.sub0:vreg_128 = PRED_COPY %167.sub0 { + ; CHECK-NEXT: internal %169.sub2:vreg_128 = PRED_COPY %167.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %169, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %114.sub2:vreg_128 = COPY %113.sub2 + ; CHECK-NEXT: undef %114.sub2:vreg_128 = PRED_COPY %113.sub2 ; CHECK-NEXT: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: undef %115.sub0:vreg_128 = COPY %114.sub0 { - ; CHECK-NEXT: internal %115.sub2:vreg_128 = COPY %114.sub2 + ; CHECK-NEXT: undef %115.sub0:vreg_128 = PRED_COPY %114.sub0 { + ; CHECK-NEXT: internal %115.sub2:vreg_128 = PRED_COPY %114.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %119.sub2:vreg_128 = COPY %118.sub2 + ; CHECK-NEXT: undef %119.sub2:vreg_128 = PRED_COPY %118.sub2 ; CHECK-NEXT: %119.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: undef %181.sub0:vreg_128 = COPY %119.sub0 { - ; CHECK-NEXT: internal %181.sub2:vreg_128 = COPY %119.sub2 + ; CHECK-NEXT: undef %181.sub0:vreg_128 = PRED_COPY %119.sub0 { + ; CHECK-NEXT: internal %181.sub2:vreg_128 = PRED_COPY %119.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %181, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %124.sub2:vreg_128 = COPY %123.sub2 + ; CHECK-NEXT: undef %124.sub2:vreg_128 = PRED_COPY %123.sub2 ; CHECK-NEXT: %124.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK-NEXT: undef %125.sub0:vreg_128 = COPY %124.sub0 { - ; CHECK-NEXT: internal %125.sub2:vreg_128 = COPY %124.sub2 + ; CHECK-NEXT: undef %125.sub0:vreg_128 = PRED_COPY %124.sub0 { + ; CHECK-NEXT: internal %125.sub2:vreg_128 = PRED_COPY %124.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %129.sub2:vreg_128 = COPY %128.sub2 + ; CHECK-NEXT: undef %129.sub2:vreg_128 = PRED_COPY %128.sub2 ; CHECK-NEXT: %129.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %130.sub0:vreg_128 = COPY %129.sub0 { - ; CHECK-NEXT: internal %130.sub2:vreg_128 = COPY %129.sub2 + ; CHECK-NEXT: undef %130.sub0:vreg_128 = PRED_COPY %129.sub0 { + ; CHECK-NEXT: internal %130.sub2:vreg_128 = PRED_COPY %129.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %134.sub2:vreg_128 = COPY %133.sub2 + ; CHECK-NEXT: undef %134.sub2:vreg_128 = PRED_COPY %133.sub2 ; CHECK-NEXT: %134.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef %135.sub0:vreg_128 = COPY %134.sub0 { - ; CHECK-NEXT: internal %135.sub2:vreg_128 = COPY %134.sub2 + ; CHECK-NEXT: undef %135.sub0:vreg_128 = PRED_COPY %134.sub0 { + ; CHECK-NEXT: internal %135.sub2:vreg_128 = PRED_COPY %134.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %145.sub2:vreg_128 = COPY %144.sub2 + ; CHECK-NEXT: undef %145.sub2:vreg_128 = PRED_COPY %144.sub2 ; CHECK-NEXT: %145.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef %146.sub0:vreg_128 = COPY %145.sub0 { - ; CHECK-NEXT: internal %146.sub2:vreg_128 = COPY %145.sub2 + ; CHECK-NEXT: undef %146.sub0:vreg_128 = PRED_COPY %145.sub0 { + ; CHECK-NEXT: internal %146.sub2:vreg_128 = PRED_COPY %145.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %150.sub2:vreg_128 = COPY %149.sub2 + ; CHECK-NEXT: undef %150.sub2:vreg_128 = PRED_COPY %149.sub2 ; CHECK-NEXT: %150.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: undef %151.sub0:vreg_128 = COPY %150.sub0 { - ; CHECK-NEXT: internal %151.sub2:vreg_128 = COPY %150.sub2 + ; CHECK-NEXT: undef %151.sub0:vreg_128 = PRED_COPY %150.sub0 { + ; CHECK-NEXT: internal %151.sub2:vreg_128 = PRED_COPY %150.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %157.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %155.sub2:vreg_128 = COPY %157.sub2 + ; CHECK-NEXT: undef %155.sub2:vreg_128 = PRED_COPY %157.sub2 ; CHECK-NEXT: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %156.sub0:vreg_128 = COPY %155.sub0 { - ; CHECK-NEXT: internal %156.sub2:vreg_128 = COPY %155.sub2 + ; CHECK-NEXT: undef %156.sub0:vreg_128 = PRED_COPY %155.sub0 { + ; CHECK-NEXT: internal %156.sub2:vreg_128 = PRED_COPY %155.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %165.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %163.sub2:vreg_128 = COPY %165.sub2 + ; CHECK-NEXT: undef %163.sub2:vreg_128 = PRED_COPY %165.sub2 ; CHECK-NEXT: %163.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %164.sub0:vreg_128 = COPY %163.sub0 { - ; CHECK-NEXT: internal %164.sub2:vreg_128 = COPY %163.sub2 + ; CHECK-NEXT: undef %164.sub0:vreg_128 = PRED_COPY %163.sub0 { + ; CHECK-NEXT: internal %164.sub2:vreg_128 = PRED_COPY %163.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %176.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %174.sub2:vreg_128 = COPY %176.sub2 + ; CHECK-NEXT: undef %174.sub2:vreg_128 = PRED_COPY %176.sub2 ; CHECK-NEXT: %174.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %175.sub0:vreg_128 = COPY %174.sub0 { - ; CHECK-NEXT: internal %175.sub2:vreg_128 = COPY %174.sub2 + ; CHECK-NEXT: undef %175.sub0:vreg_128 = PRED_COPY %174.sub0 { + ; CHECK-NEXT: internal %175.sub2:vreg_128 = PRED_COPY %174.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %195.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: undef %180.sub2:vreg_128 = COPY %195.sub2 + ; CHECK-NEXT: undef %180.sub2:vreg_128 = PRED_COPY %195.sub2 ; CHECK-NEXT: %180.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec ; CHECK-NEXT: undef %194.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec - ; CHECK-NEXT: undef %193.sub2:vreg_128 = COPY %194.sub2 + ; CHECK-NEXT: undef %193.sub2:vreg_128 = PRED_COPY %194.sub2 ; CHECK-NEXT: %193.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec @@ -225,164 +225,164 @@ ; CHECK-NEXT: %36.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %36.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %191.sub0:vreg_128 = COPY %193.sub0 { - ; CHECK-NEXT: internal %191.sub2:vreg_128 = COPY %193.sub2 + ; CHECK-NEXT: undef %191.sub0:vreg_128 = PRED_COPY %193.sub0 { + ; CHECK-NEXT: internal %191.sub2:vreg_128 = PRED_COPY %193.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %191.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %191.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %191, %2, 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %178.sub0:vreg_128 = COPY %180.sub0 { - ; CHECK-NEXT: internal %178.sub2:vreg_128 = COPY %180.sub2 + ; CHECK-NEXT: undef %178.sub0:vreg_128 = PRED_COPY %180.sub0 { + ; CHECK-NEXT: internal %178.sub2:vreg_128 = PRED_COPY %180.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %178.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %178.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %178, %2, 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %172.sub0:vreg_128 = COPY %175.sub0 { - ; CHECK-NEXT: internal %172.sub2:vreg_128 = COPY %175.sub2 + ; CHECK-NEXT: undef %172.sub0:vreg_128 = PRED_COPY %175.sub0 { + ; CHECK-NEXT: internal %172.sub2:vreg_128 = PRED_COPY %175.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %172.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %172.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %172, %2, 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %161.sub0:vreg_128 = COPY %164.sub0 { - ; CHECK-NEXT: internal %161.sub2:vreg_128 = COPY %164.sub2 + ; CHECK-NEXT: undef %161.sub0:vreg_128 = PRED_COPY %164.sub0 { + ; CHECK-NEXT: internal %161.sub2:vreg_128 = PRED_COPY %164.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %161.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %161.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %161, %2, 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %153.sub0:vreg_128 = COPY %156.sub0 { - ; CHECK-NEXT: internal %153.sub2:vreg_128 = COPY %156.sub2 + ; CHECK-NEXT: undef %153.sub0:vreg_128 = PRED_COPY %156.sub0 { + ; CHECK-NEXT: internal %153.sub2:vreg_128 = PRED_COPY %156.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %153.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %153.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %148.sub0:vreg_128 = COPY %151.sub0 { - ; CHECK-NEXT: internal %148.sub2:vreg_128 = COPY %151.sub2 + ; CHECK-NEXT: undef %148.sub0:vreg_128 = PRED_COPY %151.sub0 { + ; CHECK-NEXT: internal %148.sub2:vreg_128 = PRED_COPY %151.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %148.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %148.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %143.sub0:vreg_128 = COPY %146.sub0 { - ; CHECK-NEXT: internal %143.sub2:vreg_128 = COPY %146.sub2 + ; CHECK-NEXT: undef %143.sub0:vreg_128 = PRED_COPY %146.sub0 { + ; CHECK-NEXT: internal %143.sub2:vreg_128 = PRED_COPY %146.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %143.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %143.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %132.sub0:vreg_128 = COPY %135.sub0 { - ; CHECK-NEXT: internal %132.sub2:vreg_128 = COPY %135.sub2 + ; CHECK-NEXT: undef %132.sub0:vreg_128 = PRED_COPY %135.sub0 { + ; CHECK-NEXT: internal %132.sub2:vreg_128 = PRED_COPY %135.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %132.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %132.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %132, %2, 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - ; CHECK-NEXT: undef %127.sub0:vreg_128 = COPY %130.sub0 { - ; CHECK-NEXT: internal %127.sub2:vreg_128 = COPY %130.sub2 + ; CHECK-NEXT: undef %127.sub0:vreg_128 = PRED_COPY %130.sub0 { + ; CHECK-NEXT: internal %127.sub2:vreg_128 = PRED_COPY %130.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %127.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %127.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %127, %2, 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %125.sub0 { - ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %125.sub2 + ; CHECK-NEXT: undef %122.sub0:vreg_128 = PRED_COPY %125.sub0 { + ; CHECK-NEXT: internal %122.sub2:vreg_128 = PRED_COPY %125.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %122.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %122.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %117.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { - ; CHECK-NEXT: internal %117.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 + ; CHECK-NEXT: undef %117.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE]].sub0 { + ; CHECK-NEXT: internal %117.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %117.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %117.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %117, %2, 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %112.sub0:vreg_128 = COPY %115.sub0 { - ; CHECK-NEXT: internal %112.sub2:vreg_128 = COPY %115.sub2 + ; CHECK-NEXT: undef %112.sub0:vreg_128 = PRED_COPY %115.sub0 { + ; CHECK-NEXT: internal %112.sub2:vreg_128 = PRED_COPY %115.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %112.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %112.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { - ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 + ; CHECK-NEXT: undef %110.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]].sub0 { + ; CHECK-NEXT: internal %110.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %110.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %110.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %110, %2, 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %184.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { - ; CHECK-NEXT: internal %184.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 + ; CHECK-NEXT: undef %184.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE2]].sub0 { + ; CHECK-NEXT: internal %184.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE2]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %184.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %184.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %184, %2, 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %137.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { - ; CHECK-NEXT: internal %137.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 + ; CHECK-NEXT: undef %137.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE3]].sub0 { + ; CHECK-NEXT: internal %137.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE3]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %137.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %137.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %137, %2, 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %103.sub0:vreg_128 = COPY %106.sub0 { - ; CHECK-NEXT: internal %103.sub2:vreg_128 = COPY %106.sub2 + ; CHECK-NEXT: undef %103.sub0:vreg_128 = PRED_COPY %106.sub0 { + ; CHECK-NEXT: internal %103.sub2:vreg_128 = PRED_COPY %106.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %103.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %103.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %103, %2, 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %98.sub0:vreg_128 = COPY %101.sub0 { - ; CHECK-NEXT: internal %98.sub2:vreg_128 = COPY %101.sub2 + ; CHECK-NEXT: undef %98.sub0:vreg_128 = PRED_COPY %101.sub0 { + ; CHECK-NEXT: internal %98.sub2:vreg_128 = PRED_COPY %101.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %98.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %98.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %98, %2, 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %93.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { - ; CHECK-NEXT: internal %93.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 + ; CHECK-NEXT: undef %93.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE4]].sub0 { + ; CHECK-NEXT: internal %93.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE4]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %93.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %93.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %93, %2, 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %88.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { - ; CHECK-NEXT: internal %88.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 + ; CHECK-NEXT: undef %88.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE5]].sub0 { + ; CHECK-NEXT: internal %88.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE5]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %88.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %88.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %88, %2, 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { - ; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 + ; CHECK-NEXT: undef %81.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE6]].sub0 { + ; CHECK-NEXT: internal %81.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE6]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %81.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %81.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %81, %2, 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %74.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { - ; CHECK-NEXT: internal %74.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 + ; CHECK-NEXT: undef %74.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE7]].sub0 { + ; CHECK-NEXT: internal %74.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE7]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %74.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %74.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %74, %2, 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %67.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { - ; CHECK-NEXT: internal %67.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 + ; CHECK-NEXT: undef %67.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE8]].sub0 { + ; CHECK-NEXT: internal %67.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE8]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %67.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %67.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %67, %2, 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %60.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { - ; CHECK-NEXT: internal %60.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 + ; CHECK-NEXT: undef %60.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE9]].sub0 { + ; CHECK-NEXT: internal %60.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE9]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %60.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %60.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %60, %2, 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %53.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { - ; CHECK-NEXT: internal %53.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 + ; CHECK-NEXT: undef %53.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE10]].sub0 { + ; CHECK-NEXT: internal %53.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE10]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %53.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %53.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %53, %2, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { - ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 + ; CHECK-NEXT: undef %46.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE11]].sub0 { + ; CHECK-NEXT: internal %46.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE11]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %46.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %46.sub3:vreg_128 = COPY %43.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -15,30 +15,30 @@ ; CHECK: bb.0..expVert: ; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %56.sub0:sgpr_64 = COPY $sgpr31 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18 - ; CHECK-NEXT: undef %50.sub0:sgpr_64 = COPY $sgpr19 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8 + ; CHECK-NEXT: undef %56.sub0:sgpr_64 = PRED_COPY $sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr27 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr25 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr18 + ; CHECK-NEXT: undef %50.sub0:sgpr_64 = PRED_COPY $sgpr19 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr20 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr21 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr22 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr23 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr9 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr10 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr8 ; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY4]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY3]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc ; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4) @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY5]], 64, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc @@ -89,11 +89,11 @@ ; CHECK-NEXT: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc ; CHECK-NEXT: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY6]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY7]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY7]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc @@ -116,16 +116,16 @@ ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY8]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) ; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY10]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load (s32)) @@ -146,11 +146,11 @@ ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR6]], -441, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], 160, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY11]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc ; CHECK-NEXT: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc @@ -158,7 +158,7 @@ ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc ; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc @@ -171,17 +171,17 @@ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) - ; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1 - ; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY13]].sub1:sgpr_128 = PRED_COPY %302.sub1 + ; CHECK-NEXT: [[PRED_COPY13]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc @@ -192,16 +192,16 @@ ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 - ; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY14]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: [[PRED_COPY14]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc @@ -209,17 +209,17 @@ ; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 - ; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY15]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub0 + ; CHECK-NEXT: [[PRED_COPY15]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_1]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: KILL %411.sub0, %411.sub1 ; CHECK-NEXT: KILL undef %488:sreg_64 - ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3 - ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc + ; CHECK-NEXT: KILL [[PRED_COPY15]].sub0_sub1, [[PRED_COPY15]].sub2_sub3 + ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY12]], 3, implicit-def dead $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc @@ -231,17 +231,17 @@ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[COPY16]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] - ; CHECK-NEXT: [[COPY16]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sgpr_128 = PRED_COPY %71 + ; CHECK-NEXT: [[PRED_COPY16]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_2]] + ; CHECK-NEXT: [[PRED_COPY16]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORD_IMM2]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], 96, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir @@ -34,13 +34,13 @@ # allocated to sgpr0_sgpr1 and the first to something else so we see two copies # in between for the two subregisters that are alive. # CHECK-LABEL: name: func1 -# CHECK: [[REG0:\$sgpr[0-9]+]] = COPY $sgpr0 -# CHECK: [[REG1:\$sgpr[0-9]+]] = COPY $sgpr2 +# CHECK: [[REG0:\$sgpr[0-9]+]] = PRED_COPY $sgpr0 +# CHECK: [[REG1:\$sgpr[0-9]+]] = PRED_COPY $sgpr2 # CHECK: S_NOP 0 # CHECK: S_NOP 0, implicit renamable [[REG0]] # CHECK: S_NOP 0, implicit renamable [[REG1]] -# CHECK: $sgpr0 = COPY killed renamable [[REG0]] -# CHECK: $sgpr2 = COPY renamable [[REG1]] +# CHECK: $sgpr0 = PRED_COPY killed renamable [[REG0]] +# CHECK: $sgpr2 = PRED_COPY renamable [[REG1]] # CHECK: S_NOP # CHECK: S_NOP 0, implicit renamable $sgpr0 # CHECK: S_NOP 0, implicit killed renamable $sgpr2 @@ -49,8 +49,8 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 - undef %0.sub0 : sgpr_128 = COPY $sgpr0 - %0.sub2 = COPY $sgpr2 + undef %0.sub0 : sgpr_128 = PRED_COPY $sgpr0 + %0.sub2 = PRED_COPY $sgpr2 S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -7,18 +7,18 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed $sgpr1 - ; CHECK-NEXT: undef %0.sub0:sreg_64 = COPY killed [[COPY]] - ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY killed $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY killed $sgpr1 + ; CHECK-NEXT: undef %0.sub0:sreg_64 = PRED_COPY killed [[PRED_COPY]] + ; CHECK-NEXT: %0.sub1:sreg_64 = PRED_COPY killed [[PRED_COPY1]] ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (invariant load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (invariant load (s32) from %ir.ptr + 8, addrspace 4) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 - ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 - ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: $sgpr0 = PRED_COPY killed [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: $sgpr1 = PRED_COPY killed [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY killed [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: $sgpr2 = PRED_COPY killed [[PRED_COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 ret <3 x i32> %load diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -16,7 +16,7 @@ bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec @@ -37,7 +37,7 @@ bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec @@ -124,7 +124,7 @@ ... # GCN-LABEL: name: test_fmaak_other_constantlike_src0_f32 -# GCN: %2:vgpr_32 = COPY %1 +# GCN: %2:vgpr_32 = PRED_COPY %1 # GCN: %2:vgpr_32 = V_FMAC_F32_e32 %stack.0, %0, %2, implicit $mode, implicit $exec --- name: test_fmaak_other_constantlike_src0_f32 @@ -166,7 +166,7 @@ bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 %0.sub0, %2, %1, implicit $mode, implicit $exec %4 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec @@ -187,7 +187,7 @@ bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec S_NOP 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -17,10 +17,10 @@ # leaving a spill of the undefined register. # CHECK-LABEL: name: undefined_physreg_sgpr_spill -# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec # CHECK-NEXT: SI_SPILL_S64_SAVE $sgpr0_sgpr1, # CHECK-NEXT: $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc -# CHECK: $exec = COPY killed $sgpr2_sgpr3 +# CHECK: $exec = PRED_COPY killed $sgpr2_sgpr3 name: undefined_physreg_sgpr_spill alignment: 1 exposesReturnsTwice: false @@ -43,12 +43,12 @@ successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `ptr addrspace(4) undef`) + $vgpr1_vgpr2 = PRED_COPY killed $sgpr4_sgpr5, implicit $exec + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec - $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 @@ -74,7 +74,7 @@ bb.3: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 - $vcc = COPY $vgpr1 + $vcc = PRED_COPY $vgpr1 S_ENDPGM 0 ... @@ -82,10 +82,10 @@ # Move spill to after future save instruction # CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder -# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc # CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) -# CHECK: $exec = COPY killed $sgpr2_sgpr3 +# CHECK: $exec = PRED_COPY killed $sgpr2_sgpr3 name: undefined_physreg_sgpr_spill_reorder alignment: 1 exposesReturnsTwice: false @@ -108,12 +108,12 @@ successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `ptr addrspace(4) undef`) + $vgpr1_vgpr2 = PRED_COPY killed $sgpr4_sgpr5, implicit $exec + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec - $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $exec = S_MOV_B64_term killed $sgpr2_sgpr3 @@ -139,7 +139,7 @@ bb.3: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 - $vcc = COPY $vgpr1 + $vcc = PRED_COPY $vgpr1 S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -21,7 +21,7 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -47,8 +47,8 @@ ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr50 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr50 = PRED_COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -75,9 +75,9 @@ ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr50 = COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr49 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr50 = PRED_COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr49 = PRED_COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -104,10 +104,10 @@ ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $vgpr55 = COPY $vgpr0, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN-NEXT: $vgpr54 = COPY $vgpr1, implicit $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN-NEXT: $vgpr53 = COPY $vgpr2, implicit $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN-NEXT: $vgpr52 = COPY $vgpr3, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr55 = PRED_COPY $vgpr0, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr54 = PRED_COPY $vgpr1, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr53 = PRED_COPY $vgpr2, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr52 = PRED_COPY $vgpr3, implicit $vgpr52_vgpr53_vgpr54_vgpr55 ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr52_vgpr53_vgpr54_vgpr55 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -131,7 +131,7 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -157,8 +157,8 @@ ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr28 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr28 = PRED_COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -185,9 +185,9 @@ ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr28 = COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr27 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr28 = PRED_COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr27 = PRED_COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -214,10 +214,10 @@ ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $agpr3 = COPY $agpr4, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr2 = COPY $agpr5, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr1 = COPY $agpr6, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0 = COPY $agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr3 = PRED_COPY $agpr4, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr2 = PRED_COPY $agpr5, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr1 = PRED_COPY $agpr6, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr0 = PRED_COPY $agpr7, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -8,9 +8,9 @@ ; SI-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY1]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} @@ -18,7 +18,7 @@ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %13:vgpr_32, %bb.0, %4, %bb.3 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, undef %15:vgpr_32, %bb.3 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, undef %15:vgpr_32, %bb.3 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -31,13 +31,13 @@ ; SI-NEXT: bb.3.else: ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, killed [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.end: ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[V_ADD_F32_e64_]], %bb.2 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[PHI2]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI2]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -64,9 +64,9 @@ ; SI-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY1]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} @@ -74,20 +74,20 @@ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, %5, %bb.3 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, [[COPY]], %bb.3 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, [[PRED_COPY]], %bb.3 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.2.if: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.4 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.3.else: ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.end: @@ -95,7 +95,7 @@ ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[V_ADD_F32_e64_]], %bb.2 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI2]], 0, killed [[PHI3]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[V_ADD_F32_e64_1]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[V_ADD_F32_e64_1]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -123,18 +123,18 @@ ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY killed $sgpr0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY killed $sgpr0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY3]], implicit $exec ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.for.body: ; SI-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %14, %bb.5 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %13, %bb.5 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, %13, %bb.5 ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.4 ; SI-NEXT: {{ $}} @@ -150,14 +150,14 @@ ; SI-NEXT: bb.3.if: ; SI-NEXT: successors: %bb.5(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PHI]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PHI]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, killed [[PHI4]], 0, implicit $exec ; SI-NEXT: S_BRANCH %bb.5 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.else: ; SI-NEXT: successors: %bb.2(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY2]], 0, [[PHI1]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY2]], 0, [[PHI1]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[PHI1]], 3, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -169,13 +169,13 @@ ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[PHI6]], 0, implicit $exec ; SI-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[PHI]], 1, implicit-def dead $scc - ; SI-NEXT: S_CMP_LT_I32 [[S_ADD_I32_]], [[COPY1]], implicit-def $scc + ; SI-NEXT: S_CMP_LT_I32 [[S_ADD_I32_]], [[PRED_COPY1]], implicit-def $scc ; SI-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.for.end: ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI6]], 0, killed [[PHI5]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[V_ADD_F32_e64_]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[V_ADD_F32_e64_]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 entry: ; %break = icmp sgt i32 %bound, 0 @@ -222,13 +222,13 @@ ; SI-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr5 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr4 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY5]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} @@ -236,9 +236,9 @@ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %49:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %51:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %53:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %55:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY4]], %bb.0, undef %51:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY3]], %bb.0, undef %53:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY2]], %bb.0, undef %55:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -263,12 +263,12 @@ ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI5]] + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY6]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI5]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} @@ -276,20 +276,20 @@ ; SI-NEXT: successors: %bb.10(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]] - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] ; SI-NEXT: S_BRANCH %bb.10 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.else: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 + ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %61:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 - ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[COPY4]], %bb.6 + ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[PRED_COPY4]], %bb.6 ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 @@ -300,12 +300,12 @@ ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI7]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY9]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI7]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} @@ -313,13 +313,13 @@ ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]] - ; SI-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]] + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY10]] ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[PRED_COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[PHI8]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI8]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -345,13 +345,13 @@ ; SI-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr5 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr4 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY5]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} @@ -359,8 +359,8 @@ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %50:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -384,12 +384,12 @@ ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] - ; SI-NEXT: $vgpr0 = COPY [[COPY4]] + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY6]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} @@ -397,13 +397,13 @@ ; SI-NEXT: successors: %bb.10(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]] - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] ; SI-NEXT: S_BRANCH %bb.10 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.else: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 + ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: @@ -420,12 +420,12 @@ ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] - ; SI-NEXT: $vgpr0 = COPY [[COPY4]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY9]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} @@ -433,14 +433,14 @@ ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]] - ; SI-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]] + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY10]] ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[PRED_COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI5]], 0, killed [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[V_ADD_F32_e64_]] + ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI5]], 0, killed [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[V_ADD_F32_e64_]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -466,16 +466,16 @@ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]](s32), implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY killed $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[PRED_COPY1]](s32), implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.if.then: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset, align 4, addrspace 4) + ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[PRED_COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset, align 4, addrspace 4) ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %51, 0, implicit $exec ; SI-NEXT: %44:vgpr_32, dead %46:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %44, %subreg.sub1 @@ -504,7 +504,7 @@ ; SI-NEXT: bb.5.Flow: ; SI-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %52:vgpr_32, %bb.6 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY1]](s32), %bb.0, undef %52:vgpr_32, %bb.6 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} @@ -566,26 +566,26 @@ ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY killed $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY killed $vgpr0 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.if.then: ; SI-NEXT: successors: %bb.2(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4) - ; SI-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 3, killed [[COPY1]](s32), implicit $exec + ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[PRED_COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4) + ; SI-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 3, killed [[PRED_COPY1]](s32), implicit $exec ; SI-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR killed [[S_LOAD_DWORDX2_IMM]], killed [[V_LSHLREV_B32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.idx, addrspace 1) ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_SADDR]], 16, 0, implicit $exec :: (invariant load (s128) from %ir.3 + 16, addrspace 4) - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub0 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_SADDR]], 0, 0, implicit $exec :: (invariant load (s128) from %ir.3, align 32, addrspace 4) - ; SI-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub3 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub2 - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub1 - ; SI-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_1]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3, killed [[COPY5]], %subreg.sub4, killed [[COPY4]], %subreg.sub5, killed [[COPY3]], %subreg.sub6, killed [[COPY2]], %subreg.sub7 + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub3 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub2 + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub1 + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_1]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256 = REG_SEQUENCE killed [[PRED_COPY9]], %subreg.sub0, killed [[PRED_COPY8]], %subreg.sub1, killed [[PRED_COPY7]], %subreg.sub2, killed [[PRED_COPY6]], %subreg.sub3, killed [[PRED_COPY5]], %subreg.sub4, killed [[PRED_COPY4]], %subreg.sub5, killed [[PRED_COPY3]], %subreg.sub6, killed [[PRED_COPY2]], %subreg.sub7 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_2:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 killed [[GLOBAL_LOAD_DWORDX2_SADDR]], 48, 0, implicit $exec :: (invariant load (s128) from %ir.add.ptr.i, addrspace 4) ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -79,10 +79,10 @@ bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 - %3 = COPY $vgpr0 - %2 = COPY $sgpr2 - %1 = COPY $sgpr1 - %0 = COPY $sgpr0 + %3 = PRED_COPY $vgpr0 + %2 = PRED_COPY $sgpr2 + %1 = PRED_COPY $sgpr1 + %0 = PRED_COPY $sgpr0 S_CMP_LT_I32 0, %0, implicit-def $scc %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec %5 = S_CSELECT_B32 %2, %1, implicit $scc @@ -98,9 +98,9 @@ # #CHECK: %bb.1 #CHECK: S_CMP_LT_I32 -#CHECK: COPY $scc +#CHECK: PRED_COPY $scc #CHECK: ENTER_STRICT_WWM -#CHECK: $scc = COPY +#CHECK: $scc = PRED_COPY #CHECK: S_CSELECT_B32 name: test_strict_wwm_scc2 tracksRegLiveness: true @@ -108,10 +108,10 @@ bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 - %3:vgpr_32 = COPY $vgpr0 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:vgpr_32 = PRED_COPY $vgpr0 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = IMPLICIT_DEF bb.1: @@ -121,14 +121,14 @@ %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec $vgpr0 = STRICT_WWM %11:vgpr_32, implicit $exec - $vgpr1 = COPY %10:vgpr_32 + $vgpr1 = PRED_COPY %10:vgpr_32 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 ... --- # V_SET_INACTIVE, when its second operand is undef, is replaced by a -# COPY by si-wqm. Ensure the instruction is removed. +# PRED_COPY by si-wqm. Ensure the instruction is removed. #CHECK-NOT: V_SET_INACTIVE name: no_cfg alignment: 1 @@ -166,18 +166,18 @@ bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %5:sgpr_128 = COPY %6 + %5:sgpr_128 = PRED_COPY %6 %7:sreg_32 = S_MOV_B32 0 %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, implicit $exec - %16:vgpr_32 = COPY %8.sub1 - %11:vgpr_32 = COPY %16 + %16:vgpr_32 = PRED_COPY %8.sub1 + %11:vgpr_32 = PRED_COPY %16 %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc - %14:vgpr_32 = COPY %7 + %14:vgpr_32 = PRED_COPY %7 %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec @@ -188,7 +188,7 @@ --- # Ensure that strict_wwm is not put around an EXEC copy #CHECK-LABEL: name: copy_exec -#CHECK: %7:sreg_64 = COPY $exec +#CHECK: %7:sreg_64 = PRED_COPY $exec #CHECK-NEXT: %14:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec #CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec #CHECK-NEXT: $exec = EXIT_STRICT_WWM %14 @@ -199,22 +199,22 @@ bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %5:sreg_32 = S_MOV_B32 0 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, implicit $exec - %8:sreg_64 = COPY $exec + %8:sreg_64 = PRED_COPY $exec %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63 early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec - %14:vgpr_32 = COPY %13 + %14:vgpr_32 = PRED_COPY %13 BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, implicit $exec S_ENDPGM 0 @@ -228,9 +228,9 @@ #CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc #CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 #CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 -#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc +#CHECK-NEXT: %14:sreg_32_xm0 = PRED_COPY $scc #CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc -#CHECK-NEXT: $scc = COPY %14 +#CHECK-NEXT: $scc = PRED_COPY %14 #CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 #CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2 #CHECK-NEXT: S_CBRANCH_SCC0 %bb.2 @@ -240,18 +240,18 @@ bb.0: liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2 - $m0 = COPY $sgpr1 - %0:vgpr_32 = COPY $vgpr1 - %1:vgpr_32 = COPY $vgpr2 - %8:sgpr_32 = COPY $sgpr2 + $m0 = PRED_COPY $sgpr1 + %0:vgpr_32 = PRED_COPY $vgpr1 + %1:vgpr_32 = PRED_COPY $vgpr2 + %8:sgpr_32 = PRED_COPY $sgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec - undef %7.sub0:vreg_64 = COPY %2:vgpr_32 - %7.sub1:vreg_64 = COPY %3:vgpr_32 + undef %7.sub0:vreg_64 = PRED_COPY %2:vgpr_32 + %7.sub1:vreg_64 = PRED_COPY %3:vgpr_32 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc @@ -270,10 +270,10 @@ S_ENDPGM 0 bb.2: - $vgpr0 = COPY %4.sub0:vreg_128 - $vgpr1 = COPY %4.sub1:vreg_128 - $vgpr2 = COPY %9.sub0:vreg_128 - $vgpr3 = COPY %9.sub1:vreg_128 + $vgpr0 = PRED_COPY %4.sub0:vreg_128 + $vgpr1 = PRED_COPY %4.sub1:vreg_128 + $vgpr2 = PRED_COPY %9.sub0:vreg_128 + $vgpr3 = PRED_COPY %9.sub1:vreg_128 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 ... @@ -292,8 +292,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:vgpr_32 = COPY $vgpr0 + %0:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:vgpr_32 = PRED_COPY $vgpr0 %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, implicit $exec %2.sub0:vreg_64 = V_SET_INACTIVE_B32 %2.sub0:vreg_64, 0, implicit $exec, implicit-def $scc %2.sub1:vreg_64 = V_SET_INACTIVE_B32 %2.sub1:vreg_64, 0, implicit $exec, implicit-def $scc @@ -308,7 +308,7 @@ # If not then initial V_MOV will not be in WQM. # #CHECK-LABEL: name: test_wqm_lr_phi -#CHECK: COPY $exec +#CHECK: PRED_COPY $exec #CHECK-NEXT: S_WQM #CHECK-NEXT: V_MOV_B32_e32 -10 #CHECK-NEXT: V_MOV_B32_e32 0 @@ -337,8 +337,8 @@ bb.4: %3:sgpr_128 = IMPLICIT_DEF %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) - $vgpr0 = COPY %4.sub0:vreg_128 - $vgpr1 = COPY %4.sub1:vreg_128 + $vgpr0 = PRED_COPY %4.sub0:vreg_128 + $vgpr1 = PRED_COPY %4.sub1:vreg_128 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 ... @@ -351,8 +351,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -368,8 +368,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -385,8 +385,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -402,8 +402,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -419,8 +419,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -436,8 +436,8 @@ bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF